diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -70,6 +70,14 @@ [ImmArg>]>; def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], [ImmArg>]>; + def int_x86_atomic_bts_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + def int_x86_atomic_btc_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + def int_x86_atomic_btr_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + + } // Lock binary arith with CC. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -799,6 +799,9 @@ LBTS, LBTC, LBTR, + LBTS_RM, + LBTC_RM, + LBTR_RM, /// RAO arithmetic instructions. /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5642,6 +5642,9 @@ return true; case Intrinsic::x86_cmpccxadd32: case Intrinsic::x86_cmpccxadd64: + case Intrinsic::x86_atomic_bts_rm: + case Intrinsic::x86_atomic_btc_rm: + case Intrinsic::x86_atomic_btr_rm: case Intrinsic::x86_atomic_bts: case Intrinsic::x86_atomic_btc: case Intrinsic::x86_atomic_btr: { @@ -5654,6 +5657,7 @@ MachineMemOperand::MOVolatile; return true; } + case Intrinsic::x86_aadd32: case Intrinsic::x86_aadd64: case Intrinsic::x86_aand32: @@ -28352,6 +28356,25 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } + case Intrinsic::x86_atomic_bts_rm: + case Intrinsic::x86_atomic_btc_rm: + case Intrinsic::x86_atomic_btr_rm: { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Op1 = Op.getOperand(2); + SDValue Op2 = Op.getOperand(3); + unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ? X86ISD::LBTS_RM + : IntNo == Intrinsic::x86_atomic_btc_rm ? X86ISD::LBTC_RM + : X86ISD::LBTR_RM; + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + SDValue Res = + DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other), + {Chain, Op1, Op2}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); + } case Intrinsic::x86_atomic_bts: case Intrinsic::x86_atomic_btc: case Intrinsic::x86_atomic_btr: { @@ -28363,6 +28386,7 @@ unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC : X86ISD::LBTR; + SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Res = @@ -31389,6 +31413,96 @@ : AtomicExpansionKind::None; } +enum BitTestKind : unsigned { + UndefBit, + ConstantBit, + NotConstantBit, + ShiftBit, + NotShiftBit +}; + +static std::pair FindSingleBitChange(Value *V) { + BitTestKind BTK = UndefBit; + auto *C = dyn_cast(V); + if (C) { + // Check if V is a power of 2 or or NOT power of 2. + if (isPowerOf2_64(C->getZExtValue())) { + BTK = ConstantBit; + } else if (isPowerOf2_64((~C->getValue()).getZExtValue())) { + BTK = NotConstantBit; + } + return {V, BTK}; + } + + // Check if V is some power of 2 pattern known to be non-zero + auto *I = dyn_cast(V); + if (I) { + bool Not = false; + // Check if we have a NOT + if (I->getOpcode() == Instruction::Sub || + I->getOpcode() == Instruction::Xor) { + + auto *OpC0 = dyn_cast(I->getOperand(0)); + auto *OpC1 = dyn_cast(I->getOperand(1)); + // Check if this is a NOT instruction: -1 - X or X/-1 ^ -1/X + if (!OpC0 && (!OpC1 || I->getOpcode() == Instruction::Sub)) + return {nullptr, UndefBit}; + + auto *MaybeNeg1 = OpC0 ? OpC0 : OpC1; + if (!MaybeNeg1->isMinusOne()) + return {nullptr, UndefBit}; + + auto *OpI0 = dyn_cast(I->getOperand(0)); + auto *OpI1 = dyn_cast(I->getOperand(1)); + + assert(OpI0 != nullptr || OpI1 != nullptr); + assert(OpI0 == nullptr || OpI1 == nullptr); + + I = OpI0 ? OpI0 : OpI1; + Not = true; + } + // We can only use 1 << X without more sophisticated analysis. C << X where + // C is a power of 2 but not 1 can result in zero which cannot be translated + // to bittest. Likewise any C >> X (either arith or logical) can be zero. + if (I->getOpcode() == Instruction::Shl) { + // Todo(1): The cmpxchg case is pretty costly so matching `BLSI(X)`, `X & + // -X` and some other provable power of 2 patterns that we can use CTZ on + // may be profitable. + // Todo(2): It may be possible in some cases to prove that Shl(C, X) is + // non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also + // be provably a non-zero power of 2. + // Todo(3): ROTL and ROTR patterns on a power of 2 C should also be + // transformable to bittest. + auto *ShiftVal = dyn_cast(I->getOperand(0)); + if (!ShiftVal) + return {nullptr, UndefBit}; + if (ShiftVal->equalsInt(1)) + BTK = Not ? NotShiftBit : ShiftBit; + + if (BTK == UndefBit) + return {nullptr, UndefBit}; + + Value *BitV = I->getOperand(1); + if (auto *I1 = dyn_cast(BitV)) { + // Read past a shiftmask instruction to find count + if (I1->getOpcode() == Instruction::And) { + auto *OpC0 = dyn_cast(I1->getOperand(0)); + auto *OpC1 = dyn_cast(I1->getOperand(1)); + if (OpC0 || OpC1) { + assert(OpC0 == nullptr || OpC1 == nullptr); + auto *C1 = OpC0 ? OpC0 : OpC1; + if (C1->equalsInt(I->getType()->getPrimitiveSizeInBits() - 1)) + BitV = OpC0 ? I1->getOperand(1) : I1->getOperand(0); + } + } + } + + return {BitV, BTK}; + } + } + return {nullptr, UndefBit}; +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const { // If the atomicrmw's result isn't actually used, we can just add a "lock" @@ -31398,51 +31512,138 @@ // If the atomicrmw's result is used by a single bit AND, we may use // bts/btr/btc instruction for these operations. - auto *C1 = dyn_cast(AI->getValOperand()); + // Note: InstCombinePass can cause a de-optimization here. It replaces the + // SETCC(And(AtomicRMW(P, power_of_2), power_of_2)) with LShr and Xor + // (depending on CC). This pattern can only use bts/btr/btc but we don't + // detect it. Instruction *I = AI->user_back(); - if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + auto BitChange = FindSingleBitChange(AI->getValOperand()); + if (!BitChange.first || BitChange.second == UndefBit || !AI->hasOneUse() || + I->getOpcode() != Instruction::And || + AI->getType()->getPrimitiveSizeInBits() == 8 || AI->getParent() != I->getParent()) return AtomicExpansionKind::CmpXChg; + + assert(I->getOperand(0) == AI); // The following instruction must be a AND single bit. - auto *C2 = dyn_cast(I->getOperand(1)); - unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); - if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + if (BitChange.second == ConstantBit || BitChange.second == NotConstantBit) { + auto *C1 = dyn_cast(AI->getValOperand()); + assert(C1 != nullptr); + auto *C2 = dyn_cast(I->getOperand(1)); + if (!C2 || !isPowerOf2_64(C2->getZExtValue())) { + return AtomicExpansionKind::CmpXChg; + } + if (AI->getOperation() == AtomicRMWInst::And) { + return ~C1->getValue() == C2->getValue() + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + } + return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + } + + assert(BitChange.second == ShiftBit || BitChange.second == NotShiftBit); + + auto BitTested = FindSingleBitChange(I->getOperand(1)); + if (BitTested.second != ShiftBit && BitTested.second != NotShiftBit) + return AtomicExpansionKind::CmpXChg; + + assert(BitTested.first != nullptr); + + // If shift amounts are not the same we can't use BitTestIntrinsic + if (BitChange.first != BitTested.first) return AtomicExpansionKind::CmpXChg; + // If atomic AND need to be masking all be one bit and testing the one bit + // unset in the mask if (AI->getOperation() == AtomicRMWInst::And) - return ~C1->getValue() == C2->getValue() + return (BitChange.second == NotShiftBit && BitTested.second == ShiftBit) ? AtomicExpansionKind::BitTestIntrinsic : AtomicExpansionKind::CmpXChg; - return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic - : AtomicExpansionKind::CmpXChg; + // If atomic XOR/OR need to be setting and testing the same bit. + return (BitChange.second == ShiftBit && BitTested.second == ShiftBit) + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; } void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { IRBuilder<> Builder(AI); - Intrinsic::ID IID = Intrinsic::not_intrinsic; + Intrinsic::ID IID_C = Intrinsic::not_intrinsic; + Intrinsic::ID IID_I = Intrinsic::not_intrinsic; switch (AI->getOperation()) { default: llvm_unreachable("Unknown atomic operation"); case AtomicRMWInst::Or: - IID = Intrinsic::x86_atomic_bts; + IID_C = Intrinsic::x86_atomic_bts; + IID_I = Intrinsic::x86_atomic_bts_rm; break; case AtomicRMWInst::Xor: - IID = Intrinsic::x86_atomic_btc; + IID_C = Intrinsic::x86_atomic_btc; + IID_I = Intrinsic::x86_atomic_btc_rm; break; case AtomicRMWInst::And: - IID = Intrinsic::x86_atomic_btr; + IID_C = Intrinsic::x86_atomic_btr; + IID_I = Intrinsic::x86_atomic_btr_rm; break; } Instruction *I = AI->user_back(); LLVMContext &Ctx = AI->getContext(); - unsigned Imm = - countTrailingZeros(cast(I->getOperand(1))->getZExtValue()); - Function *BitTest = - Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), Type::getInt8PtrTy(Ctx)); - Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + Function *BitTest = nullptr; + Value *Result = nullptr; + auto BitTested = FindSingleBitChange(AI->getValOperand()); + assert(BitTested.first != nullptr); + if (BitTested.second == ConstantBit || BitTested.second == NotConstantBit) { + auto *C = dyn_cast(I->getOperand(1)); + assert(C != nullptr); + + BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType()); + + unsigned Imm = countTrailingZeros(C->getZExtValue()); + Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + } else { + BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType()); + + assert(BitTested.second == ShiftBit || BitTested.second == NotShiftBit); + + Value *SI = BitTested.first; + assert(SI != nullptr); + + // BT{S|R|C} on memory operand don't modulo bit position so we need to + // mask it. + unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits(); + Value *BitPos = + Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1)); + // Todo(1): In many cases it may be provable that SI is less than + // ShiftBits in which case this mask is unnecessary + // Todo(2): In the fairly idiomatic case of P[X / sizeof_bits(X)] OP 1 + // << (X % sizeof_bits(X)) we can drop the shift mask and AGEN in + // favor of just a raw BT{S|R|C}. + + Result = Builder.CreateCall(BitTest, {Addr, BitPos}); + Result = Builder.CreateZExtOrTrunc(Result, AI->getType()); + + // If the result is only used for zero/non-zero status then we don't need to + // shift value back. Otherwise do so. + for (auto It = I->user_begin(); It != I->user_end(); ++It) { + if (auto *ICmp = dyn_cast(*It)) { + if (ICmp->isEquality()) { + auto *C0 = dyn_cast(ICmp->getOperand(0)); + auto *C1 = dyn_cast(ICmp->getOperand(1)); + if (C0 || C1) { + assert(C0 == nullptr || C1 == nullptr); + if ((C0 ? C0 : C1)->isZero()) + continue; + } + } + } + Result = Builder.CreateShl(Result, BitPos); + break; + } + } + I->replaceAllUsesWith(Result); I->eraseFromParent(); AI->eraseFromParent(); @@ -34215,6 +34416,9 @@ NODE_NAME_CASE(LBTS) NODE_NAME_CASE(LBTC) NODE_NAME_CASE(LBTR) + NODE_NAME_CASE(LBTS_RM) + NODE_NAME_CASE(LBTC_RM) + NODE_NAME_CASE(LBTR_RM) NODE_NAME_CASE(AADD) NODE_NAME_CASE(AOR) NODE_NAME_CASE(AXOR) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -869,6 +869,17 @@ def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; + +def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; + + multiclass ATOMIC_LOGIC_OP { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteBitTestSetRegRMW] in { @@ -887,10 +898,33 @@ } } +multiclass ATOMIC_LOGIC_OP_RM Opc8, string s> { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16rm : Ii8("x86_rm_" # s) addr:$src1, GR16:$src2))]>, + OpSize16, TB, LOCK; + def 32rm : Ii8("x86_rm_" # s) addr:$src1, GR32:$src2))]>, + OpSize32, TB, LOCK; + def 64rm : RIi8("x86_rm_" # s) addr:$src1, GR64:$src2))]>, + TB, LOCK; + } +} + + defm LOCK_BTS : ATOMIC_LOGIC_OP; defm LOCK_BTC : ATOMIC_LOGIC_OP; defm LOCK_BTR : ATOMIC_LOGIC_OP; +defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">; +defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">; +defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll @@ -0,0 +1,56865 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X86-NOBMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-bmi2 < %s | FileCheck %s --check-prefixes=X64-NOBMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=X64-BMI2 + +define i16 @atomic_shl1_xor_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB0_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB0_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB0_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB0_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB0_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB0_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB0_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB0_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv5 = and i16 %0, %conv1 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB1_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB1_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB1_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB1_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB1_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB1_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB1_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB1_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %2 = trunc i32 %shl4 to i16 + %conv5 = and i16 %1, %2 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB2_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB2_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB2_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB2_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB2_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB2_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB2_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB2_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %1 = and i16 %c, 15 + %shl4 = shl nuw i16 1, %1 + %and = and i16 %0, %shl4 + ret i16 %and +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv7 = and i16 %1, %shl + ret i16 %conv7 +} + +define i16 @atomic_blsi_xor_16_gpr_val(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB4_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB4_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB4_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB4_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i16 0, %c + %and = and i16 %sub, %c + %0 = atomicrmw xor ptr %v, i16 %and monotonic, align 2 + %conv9 = and i16 %0, %and + ret i16 %conv9 +} + +define i16 @atomic_shl1_xor_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB5_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB5_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB5_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB5_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB5_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB5_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB5_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB5_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB6_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB6_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %edx, %ecx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB6_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB6_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB6_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB6_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB6_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB6_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB7_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB7_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB7_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB7_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB7_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB7_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB7_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB7_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB8_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB8_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB8_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB8_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB8_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB8_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB8_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB8_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv7 = zext i1 %tobool.not to i16 + ret i16 %conv7 +} + +define i16 @atomic_blsi_xor_16_gpr_valz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB9_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB9_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB9_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB9_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + %conv9 = zext i1 %tobool.not to i16 + ret i16 %conv9 +} + +define i16 @atomic_shl1_xor_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB10_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB10_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB10_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB10_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB10_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB10_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB10_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB10_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB11_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB11_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %edx, %ecx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB11_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB11_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB11_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB11_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB11_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB11_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB12_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB12_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB12_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB12_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB12_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB12_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB12_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB12_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB13_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB13_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB13_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB13_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB13_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB13_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB13_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB13_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv8 = zext i1 %tobool to i16 + ret i16 %conv8 +} + +define i16 @atomic_blsi_xor_16_gpr_valnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB14_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB14_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB14_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB14_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool = icmp ne i32 %and8, 0 + %conv10 = zext i1 %tobool to i16 + ret i16 %conv10 +} + +define i16 @atomic_shl1_xor_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB15_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB15_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB15_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB15_5 +; X86-NOBMI2-NEXT: .LBB15_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB15_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB15_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB15_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB15_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB15_5 +; X86-BMI2-NEXT: .LBB15_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB15_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB15_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB15_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB15_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB15_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB15_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB15_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB15_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB15_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB16_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB16_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ebx, %eax +; X86-NOBMI2-NEXT: jae .LBB16_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB16_5 +; X86-NOBMI2-NEXT: .LBB16_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB16_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB16_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB16_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: jae .LBB16_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB16_5 +; X86-BMI2-NEXT: .LBB16_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB16_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB16_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB16_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB16_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB16_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB16_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB16_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: jae .LBB16_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB16_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB17_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB17_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB17_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB17_5 +; X86-NOBMI2-NEXT: .LBB17_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB17_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB17_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB17_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB17_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB17_5 +; X86-BMI2-NEXT: .LBB17_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB17_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB17_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB17_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB17_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB17_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB17_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB17_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB17_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB17_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB18_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB18_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB18_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB18_5 +; X86-NOBMI2-NEXT: .LBB18_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB18_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB18_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB18_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB18_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB18_5 +; X86-BMI2-NEXT: .LBB18_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB18_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB18_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB18_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB18_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB18_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB18_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB18_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB18_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB18_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_xor_16_gpr_br(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB19_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB19_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB19_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB19_5 +; X86-BMI2-NEXT: .LBB19_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB19_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB19_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: xorl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB19_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB19_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB19_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_xor_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB20_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB20_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %edi, %esi +; X86-NOBMI2-NEXT: jne .LBB20_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB20_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB20_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB20_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB20_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB20_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB20_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB20_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %esi, %edx +; X64-NOBMI2-NEXT: je .LBB20_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB20_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB20_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB20_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: je .LBB20_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB20_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB21_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB21_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %ebx, %ecx +; X86-NOBMI2-NEXT: jb .LBB21_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB21_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB21_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB21_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %edx, %esi +; X86-BMI2-NEXT: jb .LBB21_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB21_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB21_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB21_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: jae .LBB21_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB21_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB21_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB21_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: jae .LBB21_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB21_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB22_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB22_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %esi +; X86-NOBMI2-NEXT: movl %ecx, %edi +; X86-NOBMI2-NEXT: andl $15, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %edi, %esi +; X86-NOBMI2-NEXT: jb .LBB22_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB22_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB22_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB22_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movl %edx, %edi +; X86-BMI2-NEXT: andl $15, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %edi, %esi +; X86-BMI2-NEXT: jb .LBB22_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB22_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB22_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB22_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: movl %ecx, %esi +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %esi, %edx +; X64-NOBMI2-NEXT: jae .LBB22_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB22_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB22_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB22_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: movl %esi, %edx +; X64-BMI2-NEXT: andl $15, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %edx, %ecx +; X64-BMI2-NEXT: jae .LBB22_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB22_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB23_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB23_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: jne .LBB23_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB23_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB23_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB23_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB23_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB23_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB23_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB23_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: je .LBB23_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB23_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB23_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB23_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: je .LBB23_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB23_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_xor_16_gpr_brz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB24_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB24_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB24_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB24_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB24_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: xorl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB24_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %esi +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %esi, %edx +; X64-BMI2-NEXT: je .LBB24_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB24_3: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_xor_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB25_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB25_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB25_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB25_5 +; X86-NOBMI2-NEXT: .LBB25_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB25_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB25_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB25_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB25_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB25_5 +; X86-BMI2-NEXT: .LBB25_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB25_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB25_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB25_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB25_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB25_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB25_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB25_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB25_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB25_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_xor_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB26_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB26_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ebx, %eax +; X86-NOBMI2-NEXT: jae .LBB26_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB26_5 +; X86-NOBMI2-NEXT: .LBB26_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB26_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB26_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB26_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: jae .LBB26_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB26_5 +; X86-BMI2-NEXT: .LBB26_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB26_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB26_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB26_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB26_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB26_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB26_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB26_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: jae .LBB26_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB26_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw xor ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_xor_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB27_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB27_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB27_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB27_5 +; X86-NOBMI2-NEXT: .LBB27_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB27_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB27_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB27_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB27_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB27_5 +; X86-BMI2-NEXT: .LBB27_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB27_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB27_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB27_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB27_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB27_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB27_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB27_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB27_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB27_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_xor_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB28_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB28_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB28_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB28_5 +; X86-NOBMI2-NEXT: .LBB28_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB28_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB28_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB28_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB28_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB28_5 +; X86-BMI2-NEXT: .LBB28_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB28_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB28_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB28_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB28_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB28_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB28_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB28_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB28_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB28_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw xor ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_xor_16_gpr_brnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB29_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB29_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB29_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB29_5 +; X86-BMI2-NEXT: .LBB29_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB29_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB29_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: xorl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB29_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB29_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB29_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw xor ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB30_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB30_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB30_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB30_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB30_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB30_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB30_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB30_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv5 = and i16 %0, %conv1 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB31_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB31_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB31_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB31_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB31_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB31_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB31_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB31_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %2 = trunc i32 %shl4 to i16 + %conv5 = and i16 %1, %2 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB32_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB32_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB32_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB32_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB32_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB32_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB32_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB32_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %1 = and i16 %c, 15 + %shl4 = shl nuw i16 1, %1 + %and = and i16 %0, %shl4 + ret i16 %and +} + +define i16 @atomic_shl1_mask01_or_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv7 = and i16 %1, %shl + ret i16 %conv7 +} + +define i16 @atomic_blsi_or_16_gpr_val(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB34_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB34_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB34_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB34_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i16 0, %c + %and = and i16 %sub, %c + %0 = atomicrmw or ptr %v, i16 %and monotonic, align 2 + %conv9 = and i16 %0, %and + ret i16 %conv9 +} + +define i16 @atomic_shl1_or_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB35_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB35_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB35_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB35_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB35_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB35_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB35_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB35_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB36_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB36_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %edx, %ecx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB36_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB36_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB36_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB36_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB36_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB36_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB37_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB37_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB37_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB37_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB37_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB37_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB37_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB37_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask01_or_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB38_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB38_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB38_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB38_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB38_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB38_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB38_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB38_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv7 = zext i1 %tobool.not to i16 + ret i16 %conv7 +} + +define i16 @atomic_blsi_or_16_gpr_valz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB39_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB39_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB39_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB39_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw or ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + %conv9 = zext i1 %tobool.not to i16 + ret i16 %conv9 +} + +define i16 @atomic_shl1_or_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB40_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB40_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB40_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB40_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB40_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB40_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB40_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB40_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB41_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB41_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %edx, %ecx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB41_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB41_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB41_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB41_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB41_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB41_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB42_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB42_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB42_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB42_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB42_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB42_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB42_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB42_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask01_or_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB43_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB43_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB43_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB43_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %edx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB43_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB43_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andb $15, %sil +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB43_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB43_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv8 = zext i1 %tobool to i16 + ret i16 %conv8 +} + +define i16 @atomic_blsi_or_16_gpr_valnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB44_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %ecx, %esi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %si, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB44_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB44_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB44_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw or ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool = icmp ne i32 %and8, 0 + %conv10 = zext i1 %tobool to i16 + ret i16 %conv10 +} + +define i16 @atomic_shl1_or_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB45_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB45_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB45_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB45_5 +; X86-NOBMI2-NEXT: .LBB45_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB45_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB45_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB45_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB45_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB45_5 +; X86-BMI2-NEXT: .LBB45_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB45_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB45_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB45_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB45_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB45_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB45_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB45_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB45_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB45_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB46_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB46_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ebx, %eax +; X86-NOBMI2-NEXT: jae .LBB46_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB46_5 +; X86-NOBMI2-NEXT: .LBB46_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB46_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB46_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB46_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: jae .LBB46_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB46_5 +; X86-BMI2-NEXT: .LBB46_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB46_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB46_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB46_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB46_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB46_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB46_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB46_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: jae .LBB46_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB46_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB47_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB47_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB47_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB47_5 +; X86-NOBMI2-NEXT: .LBB47_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB47_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB47_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB47_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB47_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB47_5 +; X86-BMI2-NEXT: .LBB47_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB47_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB47_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB47_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB47_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB47_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB47_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB47_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB47_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB47_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_or_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB48_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB48_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB48_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB48_5 +; X86-NOBMI2-NEXT: .LBB48_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB48_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB48_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB48_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB48_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB48_5 +; X86-BMI2-NEXT: .LBB48_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB48_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB48_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB48_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB48_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB48_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB48_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB48_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB48_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB48_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_or_16_gpr_br(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB49_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB49_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB49_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB49_5 +; X86-BMI2-NEXT: .LBB49_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB49_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB49_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: orl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB49_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB49_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB49_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw or ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB50_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB50_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %edi, %esi +; X86-NOBMI2-NEXT: jne .LBB50_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB50_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB50_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB50_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB50_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB50_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB50_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB50_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %esi, %edx +; X64-NOBMI2-NEXT: je .LBB50_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB50_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB50_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB50_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: je .LBB50_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB50_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB51_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB51_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %ebx, %ecx +; X86-NOBMI2-NEXT: jb .LBB51_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB51_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB51_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB51_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %edx, %esi +; X86-BMI2-NEXT: jb .LBB51_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB51_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB51_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB51_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %esi, %ecx +; X64-NOBMI2-NEXT: jae .LBB51_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB51_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB51_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB51_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %esi, %ecx +; X64-BMI2-NEXT: jae .LBB51_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB51_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB52_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB52_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %esi +; X86-NOBMI2-NEXT: movl %ecx, %edi +; X86-NOBMI2-NEXT: andl $15, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %edi, %esi +; X86-NOBMI2-NEXT: jb .LBB52_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB52_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB52_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB52_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movl %edx, %edi +; X86-BMI2-NEXT: andl $15, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %edi, %esi +; X86-BMI2-NEXT: jb .LBB52_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB52_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB52_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB52_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: movl %ecx, %esi +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %esi, %edx +; X64-NOBMI2-NEXT: jae .LBB52_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB52_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB52_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB52_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: movl %esi, %edx +; X64-BMI2-NEXT: andl $15, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %edx, %ecx +; X64-BMI2-NEXT: jae .LBB52_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB52_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_or_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB53_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB53_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: jne .LBB53_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB53_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB53_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB53_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB53_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB53_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB53_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB53_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: je .LBB53_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB53_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB53_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB53_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: je .LBB53_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB53_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_or_16_gpr_brz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB54_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB54_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB54_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB54_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB54_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: orl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB54_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %esi +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %esi, %edx +; X64-BMI2-NEXT: je .LBB54_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB54_3: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw or ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB55_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB55_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB55_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB55_5 +; X86-NOBMI2-NEXT: .LBB55_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB55_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB55_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB55_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB55_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB55_5 +; X86-BMI2-NEXT: .LBB55_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB55_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB55_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB55_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB55_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB55_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB55_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB55_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB55_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB55_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_or_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB56_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB56_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ebx, %eax +; X86-NOBMI2-NEXT: jae .LBB56_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB56_5 +; X86-NOBMI2-NEXT: .LBB56_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB56_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB56_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB56_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: jae .LBB56_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB56_5 +; X86-BMI2-NEXT: .LBB56_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB56_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB56_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB56_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB56_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB56_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB56_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB56_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: jae .LBB56_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB56_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %1 = atomicrmw or ptr %v, i16 %shl monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_or_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB57_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB57_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB57_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB57_5 +; X86-NOBMI2-NEXT: .LBB57_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB57_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB57_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB57_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB57_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB57_5 +; X86-BMI2-NEXT: .LBB57_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB57_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB57_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB57_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB57_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB57_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB57_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB57_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB57_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB57_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %conv1 = trunc i32 %shl to i16 + %0 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %0 to i32 + %1 = and i16 %c, 15 + %sh_prom = zext i16 %1 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_or_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB58_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB58_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB58_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB58_5 +; X86-NOBMI2-NEXT: .LBB58_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB58_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: andb $15, %al +; X86-BMI2-NEXT: movl $1, %esi +; X86-BMI2-NEXT: shlxl %eax, %esi, %esi +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB58_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB58_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB58_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %dx, %eax +; X86-BMI2-NEXT: movzwl (%ecx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB58_5 +; X86-BMI2-NEXT: .LBB58_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB58_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB58_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB58_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB58_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB58_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andb $15, %al +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %eax, %ecx, %ecx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB58_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB58_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %ecx +; X64-BMI2-NEXT: je .LBB58_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB58_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %conv1 = trunc i32 %shl to i16 + %1 = atomicrmw or ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_or_16_gpr_brnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB59_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB59_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB59_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB59_5 +; X86-BMI2-NEXT: .LBB59_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB59_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB59_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: orl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB59_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB59_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB59_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %conv2 = trunc i32 %and to i16 + %0 = atomicrmw or ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %0 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %1 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %1, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB60_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB60_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB60_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB60_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB60_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB60_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB60_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB60_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %edx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv5 = and i16 %1, %0 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB61_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB61_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB61_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB61_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB61_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB61_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB61_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB61_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $1, %edx +; X64-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %2 = trunc i32 %shl4 to i16 + %conv5 = and i16 %1, %2 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB62_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB62_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB62_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB62_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB62_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB62_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB62_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB62_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %edx +; X64-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %2 = and i16 %c, 15 + %shl4 = shl nuw i16 1, %2 + %and = and i16 %1, %shl4 + ret i16 %and +} + +define i16 @atomic_shl1_mask01_and_16_gpr_val(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %conv1 = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv7 = and i16 %1, %shl + ret i16 %conv7 +} + +define i16 @atomic_blsi_and_16_gpr_val(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB64_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB64_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB64_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB64_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i16 0, %c + %and = and i16 %sub, %c + %conv2 = xor i16 %and, -1 + %0 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv9 = and i16 %0, %and + ret i16 %conv9 +} + +define i16 @atomic_shl1_and_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB65_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB65_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB65_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB65_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %esi +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB65_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB65_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB65_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB65_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %ecx, %edx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB66_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB66_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB66_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB66_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB66_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB66_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB66_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB66_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %ecx, %edx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB67_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB67_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setae %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB67_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB67_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setae %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB67_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB67_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setae %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB67_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB67_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %ecx, %edx +; X64-BMI2-NEXT: setae %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %2 = and i16 %c, 15 + %sh_prom = zext i16 %2 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv5 = zext i1 %tobool.not to i16 + ret i16 %conv5 +} + +define i16 @atomic_shl1_mask01_and_16_gpr_valz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB68_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB68_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: sete %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB68_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB68_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %esi +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB68_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB68_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: sete %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB68_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB68_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %ecx, %edx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %1 = trunc i32 %shl to i16 + %conv1 = xor i16 %1, -1 + %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %2 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + %conv7 = zext i1 %tobool.not to i16 + ret i16 %conv7 +} + +define i16 @atomic_blsi_and_16_gpr_valz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB69_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB69_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB69_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB69_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %0 = trunc i32 %and to i16 + %conv2 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %1 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + %conv9 = zext i1 %tobool.not to i16 + ret i16 %conv9 +} + +define i16 @atomic_shl1_and_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB70_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB70_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %edx +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB70_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB70_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %esi +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB70_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB70_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB70_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB70_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %ecx, %edx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB71_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB71_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB71_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB71_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB71_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB71_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB71_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB71_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %ecx, %edx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB72_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB72_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %edx +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB72_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB72_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: btl %ecx, %edx +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB72_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB72_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB72_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB72_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: btl %ecx, %edx +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %2 = and i16 %c, 15 + %sh_prom = zext i16 %2 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv6 = zext i1 %tobool to i16 + ret i16 %conv6 +} + +define i16 @atomic_shl1_mask01_and_16_gpr_valnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB73_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB73_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: setne %al +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB73_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB73_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %ecx, %esi +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB73_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB73_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: setne %al +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB73_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB73_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %ecx, %edx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %1 = trunc i32 %shl to i16 + %conv1 = xor i16 %1, -1 + %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %2 to i32 + %and = and i32 %shl, %conv2 + %tobool = icmp ne i32 %and, 0 + %conv8 = zext i1 %tobool to i16 + ret i16 %conv8 +} + +define i16 @atomic_blsi_and_16_gpr_valnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: blsil %eax, %ecx +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB74_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB74_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: testl %edx, %ecx +; X86-BMI2-NEXT: setne %al +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: blsil %eax, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB74_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB74_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testl %edx, %ecx +; X64-BMI2-NEXT: setne %al +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %0 = trunc i32 %and to i16 + %conv2 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %1 to i32 + %and8 = and i32 %and, %conv3 + %tobool = icmp ne i32 %and8, 0 + %conv10 = zext i1 %tobool to i16 + ret i16 %conv10 +} + +define i16 @atomic_shl1_and_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB75_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB75_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB75_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB75_5 +; X86-NOBMI2-NEXT: .LBB75_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB75_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB75_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB75_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB75_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB75_5 +; X86-BMI2-NEXT: .LBB75_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB75_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB75_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %esi, %r8d +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB75_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB75_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB75_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB75_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB75_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB75_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB75_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB76_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB76_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: jae .LBB76_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB76_5 +; X86-NOBMI2-NEXT: .LBB76_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB76_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB76_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB76_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: jae .LBB76_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB76_5 +; X86-BMI2-NEXT: .LBB76_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB76_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB76_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB76_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: jae .LBB76_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB76_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB76_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB76_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB76_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB76_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB77_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB77_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB77_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB77_5 +; X86-NOBMI2-NEXT: .LBB77_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB77_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB77_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB77_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB77_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB77_5 +; X86-BMI2-NEXT: .LBB77_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB77_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB77_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB77_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB77_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB77_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB77_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB77_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: andl $15, %edx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB77_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB77_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %2 = and i16 %c, 15 + %sh_prom = zext i16 %2 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_and_16_gpr_br(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB78_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB78_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB78_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB78_5 +; X86-NOBMI2-NEXT: .LBB78_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB78_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB78_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB78_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB78_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %bx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB78_5 +; X86-BMI2-NEXT: .LBB78_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB78_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %r8d +; X64-NOBMI2-NEXT: roll %cl, %r8d +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB78_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %r8d, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB78_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB78_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB78_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %r8d +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %r8d +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB78_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %r8d, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB78_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB78_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB78_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %1 = trunc i32 %shl to i16 + %conv1 = xor i16 %1, -1 + %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %2 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_and_16_gpr_br(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB79_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB79_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB79_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB79_5 +; X86-BMI2-NEXT: .LBB79_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB79_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movl %edx, %esi +; X64-BMI2-NEXT: notl %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB79_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB79_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB79_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB79_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %0 = trunc i32 %and to i16 + %conv2 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %1 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB80_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB80_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %edi, %esi +; X86-NOBMI2-NEXT: jne .LBB80_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB80_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB80_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB80_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB80_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB80_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB80_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %esi, %r8d +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB80_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %esi, %edx +; X64-NOBMI2-NEXT: je .LBB80_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB80_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB80_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB80_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %esi +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %esi, %edx +; X64-BMI2-NEXT: je .LBB80_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB80_3: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB81_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB81_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %esi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %ecx, %esi +; X86-NOBMI2-NEXT: jb .LBB81_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB81_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB81_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB81_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %ecx, %esi +; X86-BMI2-NEXT: jb .LBB81_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB81_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB81_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB81_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %ecx, %edx +; X64-NOBMI2-NEXT: jae .LBB81_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB81_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB81_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB81_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %ecx, %edx +; X64-BMI2-NEXT: jae .LBB81_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB81_3: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB82_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB82_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %esi +; X86-NOBMI2-NEXT: movl %ecx, %edi +; X86-NOBMI2-NEXT: andl $15, %edi +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: btl %edi, %esi +; X86-NOBMI2-NEXT: jb .LBB82_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB82_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB82_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB82_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %esi +; X86-BMI2-NEXT: movl %ecx, %edi +; X86-BMI2-NEXT: andl $15, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: btl %edi, %esi +; X86-BMI2-NEXT: jb .LBB82_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB82_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB82_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB82_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %edx +; X64-NOBMI2-NEXT: movl %ecx, %esi +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: btl %esi, %edx +; X64-NOBMI2-NEXT: jae .LBB82_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB82_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB82_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB82_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %edx +; X64-BMI2-NEXT: movl %ecx, %esi +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: btl %esi, %edx +; X64-BMI2-NEXT: jae .LBB82_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB82_3: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %2 = and i16 %c, 15 + %sh_prom = zext i16 %2 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_and_16_gpr_brz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB83_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB83_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %ecx +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: testl %ecx, %esi +; X86-NOBMI2-NEXT: jne .LBB83_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: .LBB83_4: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB83_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB83_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %ecx +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %ecx, %esi +; X86-BMI2-NEXT: jne .LBB83_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %bx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB83_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %r8d +; X64-NOBMI2-NEXT: roll %cl, %r8d +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB83_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %r8d, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB83_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %ecx +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: testl %ecx, %edx +; X64-NOBMI2-NEXT: je .LBB83_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB83_3: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %r8d +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %r8d +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB83_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %r8d, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB83_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %ecx +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %ecx, %edx +; X64-BMI2-NEXT: je .LBB83_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB83_3: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %1 = trunc i32 %shl to i16 + %conv1 = xor i16 %1, -1 + %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %2 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_and_16_gpr_brz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB84_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB84_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %edi +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: testl %edi, %esi +; X86-BMI2-NEXT: jne .LBB84_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: .LBB84_4: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movl %edx, %esi +; X64-BMI2-NEXT: notl %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB84_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB84_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %esi +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: testl %esi, %edx +; X64-BMI2-NEXT: je .LBB84_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB84_3: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %0 = trunc i32 %and to i16 + %conv2 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %1 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB85_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB85_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB85_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB85_5 +; X86-NOBMI2-NEXT: .LBB85_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB85_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB85_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB85_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB85_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB85_5 +; X86-BMI2-NEXT: .LBB85_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB85_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB85_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %esi, %r8d +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB85_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB85_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB85_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB85_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB85_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB85_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB85_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask0_and_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movw $-2, %si +; X86-NOBMI2-NEXT: rolw %cl, %si +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB86_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB86_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: jae .LBB86_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB86_5 +; X86-NOBMI2-NEXT: .LBB86_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB86_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movw $-2, %si +; X86-BMI2-NEXT: rolw %cl, %si +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB86_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB86_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: jae .LBB86_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB86_5 +; X86-BMI2-NEXT: .LBB86_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB86_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movw $-2, %dx +; X64-NOBMI2-NEXT: rolw %cl, %dx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB86_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB86_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: jae .LBB86_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB86_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movw $-2, %dx +; X64-BMI2-NEXT: rolw %cl, %dx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB86_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB86_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB86_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB86_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %shl = shl nuw i16 1, %0 + %not = xor i16 %shl, -1 + %1 = atomicrmw and ptr %v, i16 %not monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %conv3 = zext i16 %c to i32 + %shl4 = shl nuw i32 1, %conv3 + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask1_and_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB87_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB87_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: andl $15, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB87_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %cx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB87_5 +; X86-NOBMI2-NEXT: .LBB87_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB87_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB87_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %di, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB87_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: andl $15, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB87_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB87_5 +; X86-BMI2-NEXT: .LBB87_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB87_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB87_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB87_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $15, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB87_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %cx, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB87_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB87_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %si, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB87_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: andl $15, %edx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB87_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %cx, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB87_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %shl = shl nuw i32 1, %conv + %0 = trunc i32 %shl to i16 + %conv1 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %1 to i32 + %2 = and i16 %c, 15 + %sh_prom = zext i16 %2 to i32 + %shl4 = shl nuw nsw i32 1, %sh_prom + %and = and i32 %shl4, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv3 = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv3 + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_mask01_and_16_gpr_brnz(ptr %v, i16 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ebx, %ecx +; X86-NOBMI2-NEXT: andb $15, %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movzwl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB88_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB88_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movzwl %ax, %eax +; X86-NOBMI2-NEXT: testl %eax, %esi +; X86-NOBMI2-NEXT: je .LBB88_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movzwl %bx, %eax +; X86-NOBMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-NOBMI2-NEXT: jmp .LBB88_5 +; X86-NOBMI2-NEXT: .LBB88_3: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: .LBB88_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ebx, %ecx +; X86-BMI2-NEXT: andb $15, %cl +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB88_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB88_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB88_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %bx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB88_5 +; X86-BMI2-NEXT: .LBB88_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB88_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andb $15, %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl $-2, %r8d +; X64-NOBMI2-NEXT: roll %cl, %r8d +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB88_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %r8d, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB88_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movzwl %ax, %eax +; X64-NOBMI2-NEXT: testl %eax, %edx +; X64-NOBMI2-NEXT: je .LBB88_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movzwl %si, %eax +; X64-NOBMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB88_3: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andb $15, %cl +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx +; X64-BMI2-NEXT: movl $-2, %r8d +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %r8d +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB88_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %r8d, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB88_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB88_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl %si, %eax +; X64-BMI2-NEXT: movzwl (%rdi,%rax,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB88_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = and i16 %c, 15 + %sh_prom = zext i16 %0 to i32 + %shl = shl nuw nsw i32 1, %sh_prom + %1 = trunc i32 %shl to i16 + %conv1 = xor i16 %1, -1 + %2 = atomicrmw and ptr %v, i16 %conv1 monotonic, align 2 + %conv2 = zext i16 %2 to i32 + %and = and i32 %shl, %conv2 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %conv + %3 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %3, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_blsi_and_16_gpr_brnz(ptr %v, i16 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_16_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movzwl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB89_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %bx, (%edx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB89_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movzwl %ax, %eax +; X86-BMI2-NEXT: testl %eax, %esi +; X86-BMI2-NEXT: je .LBB89_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movzwl %cx, %eax +; X86-BMI2-NEXT: movzwl (%edx,%eax,2), %eax +; X86-BMI2-NEXT: jmp .LBB89_5 +; X86-BMI2-NEXT: .LBB89_3: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: .LBB89_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_16_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl %si, %ecx +; X64-BMI2-NEXT: blsil %ecx, %edx +; X64-BMI2-NEXT: movl %edx, %esi +; X64-BMI2-NEXT: notl %esi +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB89_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %esi, %r8d +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %r8w, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB89_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movzwl %ax, %eax +; X64-BMI2-NEXT: testl %eax, %edx +; X64-BMI2-NEXT: je .LBB89_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movzwl (%rdi,%rcx,2), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB89_3: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %conv = zext i16 %c to i32 + %sub = sub nsw i32 0, %conv + %and = and i32 %conv, %sub + %0 = trunc i32 %and to i16 + %conv2 = xor i16 %0, -1 + %1 = atomicrmw and ptr %v, i16 %conv2 monotonic, align 2 + %conv3 = zext i16 %1 to i32 + %and8 = and i32 %and, %conv3 + %tobool.not = icmp eq i32 %and8, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i16 %c to i64 + %arrayidx = getelementptr inbounds i16, ptr %v, i64 %idxprom + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_xor_16_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcw $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btcw $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcw $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcw $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + ret i16 %1 +} + +define i16 @atomic_shl1_xor_16_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB91_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB91_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB91_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB91_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB91_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB91_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB91_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB91_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + %conv1 = xor i16 %.lobit, 1 + ret i16 %conv1 +} + +define i16 @atomic_shl1_xor_16_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB92_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB92_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB92_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB92_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB92_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB92_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB92_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB92_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + ret i16 %.lobit +} + +define i16 @atomic_shl1_xor_16_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btcw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB93_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB93_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btcw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB93_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB93_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB93_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB93_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB93_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB93_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_xor_16_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btcw $4, (%ecx) +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: jae .LBB94_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB94_1: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btcw $4, (%ecx) +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: jae .LBB94_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB94_1: # %if.then +; X86-BMI2-NEXT: movzwl 8(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcw $4, (%rdi) +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: jae .LBB94_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB94_1: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcw $4, (%rdi) +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: jae .LBB94_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB94_1: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_xor_16_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_16_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btcw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB95_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB95_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_16_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btcw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB95_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB95_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_16_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB95_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB95_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_16_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB95_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB95_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsw $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btsw $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsw $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsw $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + ret i16 %1 +} + +define i16 @atomic_shl1_or_16_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB97_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB97_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB97_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB97_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB97_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB97_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB97_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB97_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + %conv1 = xor i16 %.lobit, 1 + ret i16 %conv1 +} + +define i16 @atomic_shl1_or_16_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB98_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB98_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB98_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB98_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB98_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB98_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB98_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB98_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + ret i16 %.lobit +} + +define i16 @atomic_shl1_or_16_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btsw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB99_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB99_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btsw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB99_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB99_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB99_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB99_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB99_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB99_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btsw $4, (%ecx) +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: jae .LBB100_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB100_1: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btsw $4, (%ecx) +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: jae .LBB100_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB100_1: # %if.then +; X86-BMI2-NEXT: movzwl 8(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsw $4, (%rdi) +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: jae .LBB100_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB100_1: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsw $4, (%rdi) +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: jae .LBB100_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB100_1: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_or_16_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_16_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btsw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB101_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB101_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_16_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btsw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB101_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB101_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_16_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB101_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB101_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_16_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB101_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB101_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i16 16 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrw $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btrw $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrw $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrw $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = and i16 %0, 16 + ret i16 %1 +} + +define i16 @atomic_shl1_and_16_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB103_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $65519, %edx # imm = 0xFFEF +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB103_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB103_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $65519, %edx # imm = 0xFFEF +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB103_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB103_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $65519, %ecx # imm = 0xFFEF +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB103_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB103_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $65519, %ecx # imm = 0xFFEF +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB103_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + %conv1 = xor i16 %.lobit, 1 + ret i16 %conv1 +} + +define i16 @atomic_shl1_and_16_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movzwl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB104_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $65519, %edx # imm = 0xFFEF +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-NOBMI2-NEXT: jne .LBB104_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzwl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB104_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $65519, %edx # imm = 0xFFEF +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: lock cmpxchgw %dx, (%ecx) +; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X86-BMI2-NEXT: jne .LBB104_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movzwl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB104_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $65519, %ecx # imm = 0xFFEF +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-NOBMI2-NEXT: jne .LBB104_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB104_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $65519, %ecx # imm = 0xFFEF +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) +; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax +; X64-BMI2-NEXT: jne .LBB104_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = lshr i16 %0, 4 + %.lobit = and i16 %1, 1 + ret i16 %.lobit +} + +define i16 @atomic_shl1_and_16_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btrw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB105_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB105_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btrw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB105_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB105_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB105_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB105_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB105_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB105_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btrw $4, (%ecx) +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: jae .LBB106_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB106_1: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btrw $4, (%ecx) +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: jae .LBB106_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB106_1: # %if.then +; X86-BMI2-NEXT: movzwl 8(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrw $4, (%rdi) +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: jae .LBB106_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB106_1: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrw $4, (%rdi) +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: jae .LBB106_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB106_1: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i16 @atomic_shl1_and_16_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_16_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btrw $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB107_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movzwl 8(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB107_1: +; X86-NOBMI2-NEXT: movw $123, %ax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_16_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btrw $4, (%eax) +; X86-BMI2-NEXT: jae .LBB107_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movzwl 8(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB107_1: +; X86-BMI2-NEXT: movw $123, %ax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_16_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrw $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB107_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movzwl 8(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB107_1: +; X64-NOBMI2-NEXT: movw $123, %ax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_16_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrw $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB107_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movzwl 8(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB107_1: +; X64-BMI2-NEXT: movw $123, %ax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i16 -17 monotonic, align 2 + %1 = and i16 %0, 16 + %tobool.not = icmp eq i16 %1, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i16, ptr %v, i64 4 + %2 = load i16, ptr %arrayidx, align 2 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i16 [ %2, %if.then ], [ 123, %entry ] + ret i16 %retval.0 +} + +define i32 @atomic_shl1_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl2_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB109_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB109_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB109_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB109_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB109_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB109_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB109_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB109_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl1_neq_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB110_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB110_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB110_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB110_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB110_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB110_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB110_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB110_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %1, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %shl1 = shl nuw i32 1, %1 + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_blsi_xor_32_gpr_val(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB115_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %ecx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%edx) +; X86-BMI2-NEXT: jne .LBB115_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB115_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB115_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + ret i32 %and3 +} + +define i32 @atomic_shl1_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB116_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB116_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB116_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB116_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB116_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB116_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB116_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB116_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB117_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB117_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB117_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB117_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB117_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB117_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB117_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB117_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB118_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB118_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB118_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB118_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movzbl %cl, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB118_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB118_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB118_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB118_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movzbl %sil, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %add + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB119_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB119_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB119_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB119_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB119_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB119_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB119_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB119_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB120_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB120_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB120_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB120_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB120_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB120_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB120_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB120_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %c + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB121_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB121_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB121_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB121_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB121_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB121_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB121_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB121_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %2 = xor i32 %0, -1 + %3 = lshr i32 %2, %1 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB122_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB122_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB122_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB122_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB122_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB122_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB122_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB122_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_xor_32_gpr_valz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB123_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB123_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB123_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB123_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB124_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB124_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB124_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB124_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB124_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB124_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB124_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB124_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %1 = lshr i32 %0, %c + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB125_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB125_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB125_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB125_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB125_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB125_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB125_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB125_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool = icmp ne i32 %and, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB126_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB126_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB126_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB126_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB126_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB126_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB126_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB126_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %1 = lshr i32 %0, %add + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB127_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB127_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB127_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB127_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB127_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB127_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB127_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB127_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB128_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB128_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB128_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB128_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB128_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB128_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB128_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB128_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB129_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB129_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB129_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB129_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB129_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB129_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB129_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB129_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %2 = lshr i32 %0, %1 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB130_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB130_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB130_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB130_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB130_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB130_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB130_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB130_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_xor_32_gpr_valnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB131_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: xorl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB131_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB131_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB131_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool = icmp ne i32 %and3, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB132_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB132_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB132_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB132_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB132_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB132_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB132_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB132_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB133_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB133_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB133_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB133_5 +; X86-NOBMI2-NEXT: .LBB133_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB133_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB133_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB133_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB133_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB133_5 +; X86-BMI2-NEXT: .LBB133_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB133_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB133_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB133_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB133_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB133_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB133_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB133_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB133_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB133_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB134_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB134_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB134_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB134_5 +; X86-NOBMI2-NEXT: .LBB134_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB134_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB134_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB134_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB134_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB134_5 +; X86-BMI2-NEXT: .LBB134_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB134_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB134_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB134_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB134_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB134_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB134_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB134_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB134_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB134_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btcl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB135_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB135_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB135_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB135_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btcl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB135_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB135_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB135_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB135_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB136_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB136_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB136_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB136_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB136_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB136_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB136_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB136_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB137_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB137_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB137_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB137_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB137_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB137_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB137_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB137_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB138_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB138_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB138_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB138_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB138_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB138_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB138_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB138_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_xor_32_gpr_br(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB139_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB139_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB139_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB139_5 +; X86-BMI2-NEXT: .LBB139_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB139_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB139_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB139_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB139_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB139_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btcl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB140_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB140_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btcl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB140_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB140_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB140_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB140_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB140_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB140_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB141_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl %edi, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB141_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: jne .LBB141_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB141_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB141_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB141_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB141_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB141_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shll %cl, %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB141_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %edx +; X64-NOBMI2-NEXT: xorl %esi, %edx +; X64-NOBMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB141_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testl %esi, %eax +; X64-NOBMI2-NEXT: je .LBB141_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB141_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB141_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB141_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB141_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB141_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB142_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %edx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB142_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %edx +; X86-NOBMI2-NEXT: incb %dl +; X86-NOBMI2-NEXT: movzbl %dl, %edi +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: btl %edi, %eax +; X86-NOBMI2-NEXT: jb .LBB142_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB142_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %ecx +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB142_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %ecx, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-BMI2-NEXT: jne .LBB142_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movzbl %cl, %edi +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: btl %edi, %eax +; X86-BMI2-NEXT: jb .LBB142_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB142_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB142_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB142_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB142_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB142_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB142_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB142_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB142_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB142_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: andl $15, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB143_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB143_1: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB143_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB143_1: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btcl %esi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB143_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB143_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB143_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB143_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btcl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB144_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB144_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btcl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB144_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB144_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB144_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB144_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB144_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB144_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btcl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB145_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB145_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btcl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB145_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB145_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB145_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB145_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB145_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB145_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btcl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB146_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB146_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btcl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB146_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB146_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB146_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB146_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB146_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB146_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_xor_32_gpr_brz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: blsil %edx, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB147_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB147_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB147_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB147_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB147_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB147_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB147_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB147_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB148_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB148_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB148_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB148_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB148_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB148_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB148_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB148_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB149_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB149_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB149_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB149_5 +; X86-NOBMI2-NEXT: .LBB149_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB149_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB149_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB149_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB149_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB149_5 +; X86-BMI2-NEXT: .LBB149_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB149_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB149_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB149_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB149_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB149_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB149_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB149_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB149_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB149_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB150_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: xorl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB150_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB150_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB150_5 +; X86-NOBMI2-NEXT: .LBB150_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB150_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB150_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB150_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB150_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB150_5 +; X86-BMI2-NEXT: .LBB150_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB150_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB150_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: xorl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB150_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB150_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB150_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB150_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB150_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB150_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB150_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btcl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB151_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB151_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btcl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB151_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB151_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btcl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB151_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB151_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btcl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB151_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB151_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB152_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB152_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB152_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB152_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB152_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB152_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB152_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB152_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB153_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB153_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB153_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB153_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB153_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB153_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB153_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB153_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btcl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB154_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB154_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btcl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB154_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB154_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btcl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB154_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB154_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btcl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB154_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB154_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw xor ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_xor_32_gpr_brnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_xor_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB155_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: xorl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB155_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB155_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB155_5 +; X86-BMI2-NEXT: .LBB155_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB155_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB155_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: xorl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB155_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB155_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB155_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw xor ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl2_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB157_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB157_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB157_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB157_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB157_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB157_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB157_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB157_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl1_neq_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB158_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB158_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB158_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB158_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB158_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB158_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB158_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB158_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_shl1_mask0_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %1, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask1_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %shl1 = shl nuw i32 1, %1 + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask01_or_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_blsi_or_32_gpr_val(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB163_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %ecx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%edx) +; X86-BMI2-NEXT: jne .LBB163_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB163_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB163_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + ret i32 %and3 +} + +define i32 @atomic_shl1_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB164_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB164_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB164_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB164_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB164_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB164_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB164_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB164_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB165_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB165_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB165_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB165_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB165_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB165_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB165_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB165_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB166_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB166_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB166_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB166_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movzbl %cl, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB166_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB166_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB166_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB166_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movzbl %sil, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %add + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB167_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB167_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB167_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB167_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB167_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB167_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB167_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB167_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB168_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB168_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB168_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB168_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB168_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB168_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB168_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB168_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %c + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB169_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB169_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB169_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB169_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB169_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB169_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB169_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB169_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %2 = xor i32 %0, -1 + %3 = lshr i32 %2, %1 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB170_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB170_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB170_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB170_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB170_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB170_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB170_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB170_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_or_32_gpr_valz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB171_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB171_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB171_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB171_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB172_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB172_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB172_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB172_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB172_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB172_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB172_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB172_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %1 = lshr i32 %0, %c + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB173_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB173_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB173_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB173_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB173_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl %edx, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB173_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB173_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB173_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool = icmp ne i32 %and, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB174_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB174_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB174_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB174_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB174_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB174_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB174_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB174_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %1 = lshr i32 %0, %add + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB175_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB175_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB175_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB175_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB175_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB175_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB175_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB175_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB176_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB176_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB176_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB176_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB176_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB176_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB176_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB176_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB177_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB177_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB177_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB177_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB177_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB177_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB177_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB177_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %1 = and i32 %c, 31 + %2 = lshr i32 %0, %1 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB178_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB178_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB178_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB178_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB178_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB178_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB178_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB178_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_or_32_gpr_valnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: .cfi_offset %esi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB179_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %esi +; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) +; X86-BMI2-NEXT: jne .LBB179_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB179_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB179_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool = icmp ne i32 %and3, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB180_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB180_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB180_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB180_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB180_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB180_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB180_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB180_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB181_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB181_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB181_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB181_5 +; X86-NOBMI2-NEXT: .LBB181_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB181_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB181_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB181_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB181_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB181_5 +; X86-BMI2-NEXT: .LBB181_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB181_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB181_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB181_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB181_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB181_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB181_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB181_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB181_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB181_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB182_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB182_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB182_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB182_5 +; X86-NOBMI2-NEXT: .LBB182_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB182_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB182_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB182_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB182_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB182_5 +; X86-BMI2-NEXT: .LBB182_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB182_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB182_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB182_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB182_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB182_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB182_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB182_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB182_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB182_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btsl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB183_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB183_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB183_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB183_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB183_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB183_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB183_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB183_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB184_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB184_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB184_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB184_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB184_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB184_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB184_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB184_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB185_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB185_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB185_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB185_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB185_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB185_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB185_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB185_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB186_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB186_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB186_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB186_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB186_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB186_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB186_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB186_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_or_32_gpr_br(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB187_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB187_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB187_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB187_5 +; X86-BMI2-NEXT: .LBB187_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB187_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB187_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB187_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB187_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB187_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB188_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB188_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB188_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB188_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB188_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB188_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB188_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB188_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB189_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl %edi, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB189_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: jne .LBB189_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB189_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB189_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB189_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB189_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB189_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shll %cl, %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB189_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %edx +; X64-NOBMI2-NEXT: orl %esi, %edx +; X64-NOBMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB189_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testl %esi, %eax +; X64-NOBMI2-NEXT: je .LBB189_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB189_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB189_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB189_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB189_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB189_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB190_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %edx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB190_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %edx +; X86-NOBMI2-NEXT: incb %dl +; X86-NOBMI2-NEXT: movzbl %dl, %edi +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: btl %edi, %eax +; X86-NOBMI2-NEXT: jb .LBB190_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB190_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %ecx +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB190_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %ecx, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-BMI2-NEXT: jne .LBB190_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movzbl %cl, %edi +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: btl %edi, %eax +; X86-BMI2-NEXT: jb .LBB190_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB190_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB190_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB190_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB190_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB190_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB190_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB190_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB190_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB190_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: andl $15, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB191_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB191_1: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB191_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB191_1: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB191_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB191_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB191_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB191_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB192_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB192_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB192_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB192_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB192_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB192_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB192_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB192_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB193_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB193_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB193_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB193_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB193_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB193_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB193_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB193_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB194_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB194_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB194_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB194_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB194_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB194_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB194_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB194_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_or_32_gpr_brz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: blsil %edx, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB195_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB195_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB195_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB195_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB195_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl %edx, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB195_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB195_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB195_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB196_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB196_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB196_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB196_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB196_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB196_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB196_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB196_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB197_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB197_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB197_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB197_5 +; X86-NOBMI2-NEXT: .LBB197_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB197_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB197_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB197_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB197_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB197_5 +; X86-BMI2-NEXT: .LBB197_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB197_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB197_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB197_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB197_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB197_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB197_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB197_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB197_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB197_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB198_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: orl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB198_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB198_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB198_5 +; X86-NOBMI2-NEXT: .LBB198_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB198_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB198_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB198_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB198_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB198_5 +; X86-BMI2-NEXT: .LBB198_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB198_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB198_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: orl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB198_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB198_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB198_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB198_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB198_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: jae .LBB198_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB198_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btsl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB199_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB199_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB199_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB199_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB199_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB199_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB199_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB199_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %1 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB200_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB200_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB200_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB200_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB200_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB200_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB200_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB200_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB201_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB201_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB201_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB201_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB201_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB201_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB201_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB201_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB202_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB202_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB202_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB202_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB202_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB202_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB202_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB202_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %0 = atomicrmw or ptr %v, i32 %shl monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_or_32_gpr_brnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB203_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: orl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB203_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB203_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB203_5 +; X86-BMI2-NEXT: .LBB203_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB203_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB203_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %edx +; X64-BMI2-NEXT: orl %ecx, %edx +; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) +; X64-BMI2-NEXT: jne .LBB203_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB203_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB203_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %0 = atomicrmw or ptr %v, i32 %and monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl2_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: notl %ecx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB205_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %ecx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB205_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB205_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB205_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl %edx, %ecx +; X64-NOBMI2-NEXT: notl %ecx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB205_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %ecx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB205_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB205_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB205_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + ret i32 %and +} + +define i32 @atomic_shl1_neq_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB206_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB206_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB206_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB206_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB206_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB206_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB206_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB206_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: movl $1, %edx +; X64-BMI2-NEXT: shlxl %ecx, %edx, %ecx +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_shl1_mask0_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %1, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask1_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %1 = and i32 %c, 31 + %shl1 = shl nuw i32 1, %1 + %and = and i32 %0, %shl1 + ret i32 %and +} + +define i32 @atomic_shl1_mask01_and_32_gpr_val(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %1, %shl + ret i32 %and +} + +define i32 @atomic_blsi_and_32_gpr_val(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB211_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB211_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB211_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB211_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + ret i32 %and3 +} + +define i32 @atomic_shl1_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB212_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB212_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB212_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB212_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: setae %dl +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB212_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB212_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB212_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB212_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: notl %ecx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB213_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %ecx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB213_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB213_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB213_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl %edx, %ecx +; X64-NOBMI2-NEXT: notl %ecx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB213_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %ecx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB213_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB213_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %ecx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB213_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB214_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB214_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB214_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB214_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movzbl %cl, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB214_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB214_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB214_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB214_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %1 = xor i32 %0, -1 + %2 = lshr i32 %1, %add + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB215_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB215_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB215_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB215_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: setae %dl +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB215_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB215_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB215_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB215_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB216_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB216_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB216_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB216_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: setae %dl +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB216_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB216_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB216_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB216_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %c + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB217_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB217_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setae %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB217_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB217_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: btl %ecx, %eax +; X86-BMI2-NEXT: setae %dl +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB217_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB217_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB217_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB217_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %1 = and i32 %c, 31 + %2 = xor i32 %0, -1 + %3 = lshr i32 %2, %1 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %ecx, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB218_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB218_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB218_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB218_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %esi, %edx +; X64-NOBMI2-NEXT: andl $31, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB218_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB218_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl %esi, %edx +; X64-BMI2-NEXT: andl $31, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB218_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB218_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = xor i32 %1, -1 + %3 = lshr i32 %2, %0 + %lnot.ext = and i32 %3, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_and_32_gpr_valz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB219_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB219_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB219_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %ecx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB219_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + %lnot.ext = zext i1 %tobool.not to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB220_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB220_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB220_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB220_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB220_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB220_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB220_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB220_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %1 = lshr i32 %0, %c + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl2_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edx +; X86-NOBMI2-NEXT: shll %cl, %edx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: notl %ecx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB221_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %ecx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB221_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testl %edx, %eax +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $2, %edx +; X86-BMI2-NEXT: shlxl %eax, %edx, %edx +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB221_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB221_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl %edx, %ecx +; X64-NOBMI2-NEXT: notl %ecx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB221_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %ecx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB221_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB221_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %ecx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB221_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool = icmp ne i32 %and, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB222_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB222_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movzbl %cl, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB222_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB222_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB222_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB222_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB222_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB222_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %1 = lshr i32 %0, %add + %lnot.ext = and i32 %1, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB223_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB223_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB223_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB223_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB223_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB223_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: andl $15, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB223_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB223_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask0_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB224_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB224_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB224_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB224_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB224_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB224_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB224_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB224_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = lshr i32 %1, %c + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask1_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB225_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB225_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: btl %ecx, %eax +; X86-NOBMI2-NEXT: setb %dl +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB225_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB225_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB225_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB225_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB225_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB225_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %1 = and i32 %c, 31 + %2 = lshr i32 %0, %1 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_mask01_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %ecx, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: movl $-2, %edi +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB226_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB226_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: movl $-2, %edi +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB226_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) +; X86-BMI2-NEXT: jne .LBB226_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %edx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl %esi, %edx +; X64-NOBMI2-NEXT: andl $31, %edx +; X64-NOBMI2-NEXT: movl $-2, %esi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: roll %cl, %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB226_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB226_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl %esi, %edx +; X64-BMI2-NEXT: andl $31, %edx +; X64-BMI2-NEXT: movl $-2, %esi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI2-NEXT: roll %cl, %esi +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB226_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB226_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxl %edx, %eax, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 31 + %shl = shl nuw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %2 = lshr i32 %1, %0 + %lnot.ext = and i32 %2, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_blsi_and_32_gpr_valnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: blsil {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %edx, %esi +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB227_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) +; X86-BMI2-NEXT: jne .LBB227_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testl %edx, %eax +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB227_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %ecx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB227_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + %tobool = icmp ne i32 %and3, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB228_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB228_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB228_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB228_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB228_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB228_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB228_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB228_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB229_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB229_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB229_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB229_5 +; X86-NOBMI2-NEXT: .LBB229_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB229_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB229_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-BMI2-NEXT: jne .LBB229_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB229_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB229_5 +; X86-BMI2-NEXT: .LBB229_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB229_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl %edx, %esi +; X64-NOBMI2-NEXT: notl %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB229_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %esi, %r8d +; X64-NOBMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB229_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB229_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB229_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB229_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %edx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB229_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB229_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB229_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB230_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB230_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB230_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB230_5 +; X86-NOBMI2-NEXT: .LBB230_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB230_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB230_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB230_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB230_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB230_5 +; X86-BMI2-NEXT: .LBB230_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB230_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB230_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB230_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB230_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB230_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB230_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB230_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %edx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB230_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB230_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btrl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB231_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB231_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB231_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB231_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btrl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB231_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB231_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB231_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB231_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB232_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB232_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB232_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB232_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB232_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB232_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB232_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB232_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB233_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB233_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB233_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB233_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB233_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB233_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB233_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB233_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_and_32_gpr_br(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB234_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB234_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB234_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB234_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB234_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB234_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB234_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB234_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_and_32_gpr_br(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB235_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-BMI2-NEXT: jne .LBB235_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB235_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB235_5 +; X86-BMI2-NEXT: .LBB235_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB235_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB235_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %edx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB235_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB235_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB235_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btrl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB236_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB236_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btrl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB236_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB236_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB236_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB236_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB236_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB236_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %edi, %edx +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB237_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edx, %ebx +; X86-NOBMI2-NEXT: lock cmpxchgl %ebx, (%esi) +; X86-NOBMI2-NEXT: jne .LBB237_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: jne .LBB237_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB237_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %edx, %eax, %edi +; X86-BMI2-NEXT: movl %edi, %ecx +; X86-BMI2-NEXT: notl %ecx +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB237_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ecx, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%esi) +; X86-BMI2-NEXT: jne .LBB237_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB237_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB237_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shll %cl, %esi +; X64-NOBMI2-NEXT: movl %esi, %edx +; X64-NOBMI2-NEXT: notl %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB237_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %edx, %r8d +; X64-NOBMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB237_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testl %esi, %eax +; X64-NOBMI2-NEXT: je .LBB237_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB237_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB237_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %ecx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB237_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB237_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB237_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $-2, %edx +; X86-NOBMI2-NEXT: roll %cl, %edx +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB238_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %edx, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-NOBMI2-NEXT: jne .LBB238_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %edx +; X86-NOBMI2-NEXT: incb %dl +; X86-NOBMI2-NEXT: movzbl %dl, %edi +; X86-NOBMI2-NEXT: movl $123, %edx +; X86-NOBMI2-NEXT: btl %edi, %eax +; X86-NOBMI2-NEXT: jb .LBB238_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-NOBMI2-NEXT: .LBB238_4: # %return +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %edi, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $-2, %edx +; X86-BMI2-NEXT: roll %cl, %edx +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB238_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %edx, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%esi) +; X86-BMI2-NEXT: jne .LBB238_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: incb %dl +; X86-BMI2-NEXT: movzbl %dl, %edi +; X86-BMI2-NEXT: movl $123, %edx +; X86-BMI2-NEXT: btl %edi, %eax +; X86-BMI2-NEXT: jb .LBB238_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%ecx,4), %edx +; X86-BMI2-NEXT: .LBB238_4: # %return +; X86-BMI2-NEXT: movl %edx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB238_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB238_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btl %esi, %eax +; X64-NOBMI2-NEXT: jae .LBB238_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB238_3: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB238_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB238_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %esi +; X64-BMI2-NEXT: movl $123, %edx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: jae .LBB238_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB238_3: # %if.then +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %edx +; X64-BMI2-NEXT: movl %edx, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: andl $15, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB239_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB239_1: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB239_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB239_1: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btrl %esi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB239_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB239_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB239_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB239_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btrl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB240_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB240_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btrl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB240_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB240_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB240_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB240_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB240_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB240_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btrl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB241_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB241_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btrl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB241_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB241_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB241_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB241_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB241_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB241_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btrl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB242_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB242_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btrl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB242_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB242_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB242_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB242_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB242_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB242_1: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_and_32_gpr_brz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: blsil %edx, %edi +; X86-BMI2-NEXT: movl %edi, %ecx +; X86-BMI2-NEXT: notl %ecx +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB243_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ecx, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%esi) +; X86-BMI2-NEXT: jne .LBB243_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testl %edi, %eax +; X86-BMI2-NEXT: jne .LBB243_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx +; X86-BMI2-NEXT: .LBB243_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %edx +; X64-BMI2-NEXT: movl %edx, %ecx +; X64-BMI2-NEXT: notl %ecx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB243_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %ecx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB243_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testl %edx, %eax +; X64-BMI2-NEXT: je .LBB243_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB243_3: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB244_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB244_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB244_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB244_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB244_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB244_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB244_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB244_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl2_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $2, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB245_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB245_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %esi, %eax +; X86-NOBMI2-NEXT: je .LBB245_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB245_5 +; X86-NOBMI2-NEXT: .LBB245_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB245_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB245_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-BMI2-NEXT: jne .LBB245_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB245_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB245_5 +; X86-BMI2-NEXT: .LBB245_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB245_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shll %cl, %edx +; X64-NOBMI2-NEXT: movl %edx, %esi +; X64-NOBMI2-NEXT: notl %esi +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB245_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %r8d +; X64-NOBMI2-NEXT: andl %esi, %r8d +; X64-NOBMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB245_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testl %edx, %eax +; X64-NOBMI2-NEXT: je .LBB245_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB245_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB245_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %edx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB245_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB245_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB245_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i32 2, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl $-2, %esi +; X86-NOBMI2-NEXT: roll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB246_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edi +; X86-NOBMI2-NEXT: andl %esi, %edi +; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-NOBMI2-NEXT: jne .LBB246_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %ecx, %ebx +; X86-NOBMI2-NEXT: incb %bl +; X86-NOBMI2-NEXT: movzbl %bl, %esi +; X86-NOBMI2-NEXT: btl %esi, %eax +; X86-NOBMI2-NEXT: jae .LBB246_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-NOBMI2-NEXT: jmp .LBB246_5 +; X86-NOBMI2-NEXT: .LBB246_3: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB246_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl $-2, %esi +; X86-BMI2-NEXT: roll %cl, %esi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB246_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edi +; X86-BMI2-NEXT: andl %esi, %edi +; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) +; X86-BMI2-NEXT: jne .LBB246_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl %ecx, %ebx +; X86-BMI2-NEXT: incb %bl +; X86-BMI2-NEXT: movzbl %bl, %esi +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: jae .LBB246_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB246_5 +; X86-BMI2-NEXT: .LBB246_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB246_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: movl $-2, %edx +; X64-NOBMI2-NEXT: roll %cl, %edx +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB246_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %esi +; X64-NOBMI2-NEXT: andl %edx, %esi +; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB246_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btl %edx, %eax +; X64-NOBMI2-NEXT: jae .LBB246_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB246_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %ecx +; X64-BMI2-NEXT: movl $-2, %edx +; X64-BMI2-NEXT: roll %cl, %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB246_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %esi +; X64-BMI2-NEXT: andl %edx, %esi +; X64-BMI2-NEXT: lock cmpxchgl %esi, (%rdi) +; X64-BMI2-NEXT: jne .LBB246_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %edx +; X64-BMI2-NEXT: btl %edx, %eax +; X64-BMI2-NEXT: jae .LBB246_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB246_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %add = add i32 %c, 1 + %shl1 = shl nuw i32 1, %add + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_small_mask_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: lock btrl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB247_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB247_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btrl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB247_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB247_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btrl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB247_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB247_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: lock btrl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB247_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB247_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = and i32 %c, 15 + %shl = shl nuw nsw i32 1, %0 + %not = xor i32 %shl, -1 + %1 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %1, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv2 = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv2 + %2 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %2, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask0_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB248_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB248_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB248_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB248_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB248_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB248_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB248_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB248_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %shl1 = shl nuw i32 1, %c + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask1_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB249_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB249_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB249_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB249_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB249_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB249_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB249_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB249_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i32 1, %c + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %rem = and i32 %c, 31 + %shl1 = shl nuw i32 1, %rem + %and = and i32 %0, %shl1 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_mask01_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btrl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB250_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB250_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btrl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB250_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB250_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btrl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB250_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB250_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btrl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB250_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB250_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i32 %c, 31 + %shl = shl nuw i32 1, %rem + %not = xor i32 %shl, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and = and i32 %0, %shl + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %conv = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %conv + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_blsi_and_32_gpr_brnz(ptr %v, i32 %c) { +; X86-BMI2-LABEL: atomic_blsi_and_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: blsil %ecx, %esi +; X86-BMI2-NEXT: movl %esi, %edi +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB251_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %edi, %ebx +; X86-BMI2-NEXT: lock cmpxchgl %ebx, (%edx) +; X86-BMI2-NEXT: jne .LBB251_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %esi, %eax +; X86-BMI2-NEXT: je .LBB251_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax +; X86-BMI2-NEXT: jmp .LBB251_5 +; X86-BMI2-NEXT: .LBB251_3: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB251_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_32_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsil %esi, %ecx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: notl %edx +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB251_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %r8d +; X64-BMI2-NEXT: andl %edx, %r8d +; X64-BMI2-NEXT: lock cmpxchgl %r8d, (%rdi) +; X64-BMI2-NEXT: jne .LBB251_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testl %ecx, %eax +; X64-BMI2-NEXT: je .LBB251_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB251_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i32 0, %c + %and = and i32 %sub, %c + %not = xor i32 %and, -1 + %0 = atomicrmw and ptr %v, i32 %not monotonic, align 4 + %and3 = and i32 %0, %and + %tobool.not = icmp eq i32 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %idxprom = zext i32 %c to i64 + %arrayidx = getelementptr inbounds i32, ptr %v, i64 %idxprom + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_xor_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcl $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btcl $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcl $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcl $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + ret i32 %and +} + +define i32 @atomic_shl1_neq_xor_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB253_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB253_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB253_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB253_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB253_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB253_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB253_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB253_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + ret i32 %and +} + +define i32 @atomic_shl1_xor_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB254_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB254_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB254_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB254_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB254_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB254_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB254_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB254_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_xor_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB255_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB255_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB255_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB255_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB255_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB255_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB255_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB255_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_xor_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB256_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB256_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB256_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB256_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB256_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB256_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB256_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB256_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_neq_xor_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB257_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: xorl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB257_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB257_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB257_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB257_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB257_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB257_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB257_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_xor_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btcl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB258_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB258_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btcl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB258_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB258_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB258_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB258_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB258_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB258_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB259_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB259_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB259_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB259_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB259_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB259_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB259_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB259_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB259_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB259_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB259_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB259_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB259_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB259_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB259_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB259_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_xor_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btcl $4, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB260_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB260_1: # %if.then +; X86-NOBMI2-NEXT: movl 16(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btcl $4, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB260_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB260_1: # %if.then +; X86-BMI2-NEXT: movl 16(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcl $4, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB260_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB260_1: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcl $4, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB260_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB260_1: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB261_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB261_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB261_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB261_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB261_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB261_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB261_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB261_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB261_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB261_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB261_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB261_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB261_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB261_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB261_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB261_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_xor_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btcl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB262_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB262_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btcl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB262_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB262_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btcl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB262_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB262_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btcl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB262_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB262_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_xor_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB263_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: xorl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB263_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB263_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB263_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB263_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: xorl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB263_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB263_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB263_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB263_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: xorl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB263_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB263_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB263_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB263_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: xorl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB263_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB263_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB263_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btsl $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + ret i32 %and +} + +define i32 @atomic_shl1_neq_or_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB265_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB265_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB265_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB265_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB265_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB265_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB265_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB265_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + ret i32 %and +} + +define i32 @atomic_shl1_or_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB266_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB266_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB266_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB266_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB266_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB266_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB266_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB266_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_or_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB267_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB267_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB267_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB267_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB267_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB267_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB267_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB267_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_or_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB268_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB268_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB268_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB268_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB268_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB268_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB268_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB268_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_neq_or_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB269_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: orl $16, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB269_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB269_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: orl $16, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB269_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB269_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB269_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB269_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB269_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_or_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btsl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB270_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB270_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btsl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB270_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB270_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB270_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB270_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB270_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB270_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB271_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB271_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB271_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB271_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB271_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: orl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB271_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB271_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB271_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB271_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB271_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB271_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB271_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB271_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB271_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB271_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB271_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btsl $4, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB272_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB272_1: # %if.then +; X86-NOBMI2-NEXT: movl 16(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btsl $4, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB272_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB272_1: # %if.then +; X86-BMI2-NEXT: movl 16(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsl $4, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB272_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB272_1: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsl $4, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB272_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB272_1: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB273_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB273_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB273_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB273_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB273_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: orl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB273_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB273_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB273_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB273_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB273_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB273_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB273_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB273_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB273_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB273_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB273_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_or_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btsl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB274_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB274_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btsl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB274_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB274_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btsl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB274_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB274_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btsl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB274_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB274_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_or_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB275_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: orl $16, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB275_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB275_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB275_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB275_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: orl $16, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB275_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB275_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB275_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB275_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: orl $16, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB275_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB275_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB275_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB275_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: orl $16, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB275_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB275_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB275_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i32 16 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrl $4, (%ecx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: shll $4, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: lock btrl $4, (%ecx) +; X86-BMI2-NEXT: setb %al +; X86-BMI2-NEXT: shll $4, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrl $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shll $4, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrl $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shll $4, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 16 + ret i32 %and +} + +define i32 @atomic_shl1_neq_and_32_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB277_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $-17, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB277_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB277_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $-17, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB277_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB277_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB277_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB277_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB277_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 32 + ret i32 %and +} + +define i32 @atomic_shl1_and_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB278_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $-17, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB278_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB278_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $-17, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB278_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB278_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB278_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB278_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB278_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_neq_and_32_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB279_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $-17, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB279_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB279_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $-17, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB279_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB279_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB279_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB279_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB279_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + %lnot.ext = xor i32 %and.lobit, 1 + ret i32 %lnot.ext +} + +define i32 @atomic_shl1_and_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB280_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $-17, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB280_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB280_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $-17, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB280_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB280_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB280_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB280_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB280_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = lshr i32 %0, 4 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_neq_and_32_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%ecx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB281_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $-17, %edx +; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NOBMI2-NEXT: jne .LBB281_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%ecx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB281_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $-17, %edx +; X86-BMI2-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-BMI2-NEXT: jne .LBB281_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB281_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB281_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB281_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB281_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = lshr i32 %0, 5 + %and.lobit = and i32 %and, 1 + ret i32 %and.lobit +} + +define i32 @atomic_shl1_and_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btrl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB282_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB282_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btrl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB282_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB282_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB282_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB282_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB282_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB282_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB283_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl $-17, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB283_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB283_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB283_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB283_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl $-17, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB283_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB283_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB283_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB283_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB283_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB283_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB283_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB283_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB283_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB283_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB283_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: lock btrl $4, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB284_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB284_1: # %if.then +; X86-NOBMI2-NEXT: movl 16(%ecx), %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: lock btrl $4, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB284_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB284_1: # %if.then +; X86-BMI2-NEXT: movl 16(%ecx), %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrl $4, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB284_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB284_1: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrl $4, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB284_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB284_1: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB285_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl $-17, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB285_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB285_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB285_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB285_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl $-17, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB285_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB285_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB285_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB285_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB285_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB285_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB285_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB285_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB285_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB285_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB285_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_and_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: lock btrl $4, (%eax) +; X86-NOBMI2-NEXT: jae .LBB286_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl 16(%eax), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB286_1: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: lock btrl $4, (%eax) +; X86-BMI2-NEXT: jae .LBB286_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl 16(%eax), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB286_1: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: lock btrl $4, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB286_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %eax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB286_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: lock btrl $4, (%rdi) +; X64-BMI2-NEXT: jae .LBB286_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %eax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB286_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 16 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i32 @atomic_shl1_neq_and_32_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl (%edx), %eax +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB287_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ecx +; X86-NOBMI2-NEXT: andl $-17, %ecx +; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-NOBMI2-NEXT: jne .LBB287_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: je .LBB287_4 +; X86-NOBMI2-NEXT: # %bb.3: # %if.then +; X86-NOBMI2-NEXT: movl 16(%edx), %ecx +; X86-NOBMI2-NEXT: .LBB287_4: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_32_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movl (%edx), %eax +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB287_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ecx +; X86-BMI2-NEXT: andl $-17, %ecx +; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) +; X86-BMI2-NEXT: jne .LBB287_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: je .LBB287_4 +; X86-BMI2-NEXT: # %bb.3: # %if.then +; X86-BMI2-NEXT: movl 16(%edx), %ecx +; X86-BMI2-NEXT: .LBB287_4: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_32_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl (%rdi), %eax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB287_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movl %eax, %ecx +; X64-NOBMI2-NEXT: andl $-17, %ecx +; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB287_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB287_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movl 16(%rdi), %ecx +; X64-NOBMI2-NEXT: .LBB287_4: # %return +; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_32_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl (%rdi), %eax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB287_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movl %eax, %ecx +; X64-BMI2-NEXT: andl $-17, %ecx +; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) +; X64-BMI2-NEXT: jne .LBB287_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB287_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movl 16(%rdi), %ecx +; X64-BMI2-NEXT: .LBB287_4: # %return +; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i32 -17 monotonic, align 4 + %and = and i32 %0, 32 + %tobool.not = icmp eq i32 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i32, ptr %v, i64 4 + %1 = load i32, ptr %arrayidx, align 4 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %1, %if.then ], [ 123, %entry ] + ret i32 %retval.0 +} + +define i64 @atomic_shl1_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB288_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB288_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB288_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB288_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB288_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB288_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB288_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB288_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl2_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB289_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB289_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB289_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB289_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB289_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB289_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB289_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB289_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB289_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB289_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB289_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB289_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_neq_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB290_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB290_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB290_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB290_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB290_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB290_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB290_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB290_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB290_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB290_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB290_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB290_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB290_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB290_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB290_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB290_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxq %rsi, %rcx, %rcx +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB291_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB291_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB291_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %edi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB291_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB292_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB292_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB292_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB292_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB292_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB292_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB292_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB292_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB292_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB292_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB292_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB292_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB293_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB293_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB293_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB293_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB293_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB293_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB293_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB293_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB293_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB293_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB293_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB293_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB294_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB294_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB294_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB294_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB294_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB294_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB294_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB294_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_blsi_xor_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB295_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB295_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB295_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB295_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB295_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB295_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + ret i64 %and3 +} + +define i64 @atomic_shl1_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB296_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB296_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB296_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB296_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB296_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB296_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB296_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB296_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB296_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB296_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB296_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB296_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB296_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB296_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB296_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB296_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB297_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB297_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB297_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB297_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB297_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB297_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB297_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB297_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB297_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB297_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB297_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB297_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB298_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB298_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB298_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB298_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB298_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB298_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB298_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB298_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB298_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB298_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB298_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB298_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB298_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB298_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB298_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB298_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movzbl %sil, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %add + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB299_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB299_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %ebp, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl $31, %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %esi, %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB299_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB299_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB299_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB299_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB299_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB299_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB300_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB300_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB300_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB300_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB300_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB300_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB300_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB300_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB300_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB300_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB300_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB300_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB300_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB300_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB300_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB300_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB301_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB301_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB301_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB301_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB301_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB301_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB301_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB301_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB301_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB301_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB301_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB301_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB301_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB301_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB301_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB301_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB302_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB302_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB302_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB302_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB302_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB302_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB302_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB302_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB302_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB302_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB302_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB302_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB302_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB302_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB302_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB302_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_xor_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB303_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB303_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB303_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB303_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB303_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB303_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB304_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB304_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB304_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB304_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB304_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB304_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB304_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB304_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB304_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB304_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB304_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB304_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB304_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB304_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB304_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB304_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB305_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB305_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB305_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB305_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB305_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB305_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB305_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB305_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB305_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB305_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB305_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB305_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool = icmp ne i64 %and, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB306_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB306_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB306_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB306_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB306_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB306_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB306_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB306_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB306_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB306_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB306_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB306_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB306_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB306_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB306_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB306_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %1 = lshr i64 %0, %add + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB307_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB307_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %ebp, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl $31, %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %esi, %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB307_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB307_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %esi, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB307_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB307_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB307_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB307_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB308_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB308_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB308_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB308_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB308_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB308_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB308_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB308_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB308_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB308_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB308_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB308_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB308_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB308_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB308_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB308_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB309_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB309_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB309_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB309_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB309_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB309_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB309_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB309_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB309_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB309_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB309_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB309_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB309_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB309_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB309_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB309_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB310_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB310_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB310_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB310_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB310_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB310_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB310_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB310_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB310_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB310_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB310_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB310_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB310_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB310_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB310_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB310_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_xor_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB311_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB311_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB311_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB311_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB311_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB311_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool = icmp ne i64 %and3, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB312_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB312_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB312_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB312_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB312_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB312_7 +; X86-NOBMI2-NEXT: .LBB312_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB312_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB312_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB312_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB312_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB312_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB312_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB312_7 +; X86-BMI2-NEXT: .LBB312_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB312_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB312_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB312_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB312_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB312_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB313_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB313_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB313_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB313_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB313_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB313_7 +; X86-NOBMI2-NEXT: .LBB313_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB313_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB313_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB313_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB313_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB313_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB313_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB313_7 +; X86-BMI2-NEXT: .LBB313_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB313_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB313_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB313_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB313_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB313_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB313_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB313_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB313_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB313_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB314_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB314_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB314_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB314_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB314_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB314_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB314_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB314_9 +; X86-NOBMI2-NEXT: .LBB314_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB314_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB314_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB314_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB314_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB314_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB314_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB314_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB314_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB314_9 +; X86-BMI2-NEXT: .LBB314_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB314_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB314_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB314_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB314_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB314_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB314_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB314_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: jae .LBB314_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB314_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB315_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB315_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB315_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB315_5 +; X86-NOBMI2-NEXT: .LBB315_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB315_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB315_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB315_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB315_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: jmp .LBB315_5 +; X86-BMI2-NEXT: .LBB315_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB315_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB315_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB315_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB315_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB315_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB316_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB316_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB316_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB316_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB316_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB316_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB316_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB316_9 +; X86-NOBMI2-NEXT: .LBB316_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB316_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB316_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB316_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB316_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB316_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB316_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB316_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB316_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB316_9 +; X86-BMI2-NEXT: .LBB316_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB316_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB316_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB316_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB316_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB316_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB317_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB317_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB317_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB317_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB317_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB317_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB317_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB317_9 +; X86-NOBMI2-NEXT: .LBB317_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB317_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB317_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB317_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB317_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB317_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB317_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB317_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB317_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB317_9 +; X86-BMI2-NEXT: .LBB317_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB317_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB317_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB317_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB317_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB317_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB318_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB318_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB318_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB318_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB318_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB318_7 +; X86-NOBMI2-NEXT: .LBB318_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB318_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB318_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB318_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB318_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB318_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB318_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB318_7 +; X86-BMI2-NEXT: .LBB318_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB318_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB318_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB318_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB318_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB318_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_xor_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB319_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB319_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB319_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB319_5 +; X86-NOBMI2-NEXT: .LBB319_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB319_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB319_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB319_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB319_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB319_5 +; X86-BMI2-NEXT: .LBB319_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB319_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB319_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB319_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB319_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB319_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB320_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB320_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB320_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB320_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB320_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB320_7 +; X86-NOBMI2-NEXT: .LBB320_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB320_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB320_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB320_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB320_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB320_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB320_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB320_7 +; X86-BMI2-NEXT: .LBB320_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB320_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB320_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB320_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB320_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB320_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB321_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB321_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB321_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB321_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB321_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB321_7 +; X86-NOBMI2-NEXT: .LBB321_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB321_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB321_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB321_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB321_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB321_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB321_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB321_7 +; X86-BMI2-NEXT: .LBB321_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB321_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shlq %cl, %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB321_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rdx +; X64-NOBMI2-NEXT: xorq %rsi, %rdx +; X64-NOBMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB321_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testq %rsi, %rax +; X64-NOBMI2-NEXT: je .LBB321_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB321_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB321_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB321_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB321_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB321_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB322_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB322_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB322_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB322_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB322_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB322_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB322_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB322_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB322_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB322_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB322_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB322_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB322_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB322_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB322_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB322_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB322_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB322_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btq %rsi, %rax +; X64-NOBMI2-NEXT: jae .LBB322_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB322_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB322_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB322_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: jae .LBB322_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB322_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB323_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB323_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB323_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB323_5 +; X86-NOBMI2-NEXT: .LBB323_4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: .LBB323_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB323_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB323_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB323_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB323_5 +; X86-BMI2-NEXT: .LBB323_4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: .LBB323_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB323_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB323_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB323_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB323_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB324_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB324_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB324_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB324_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB324_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB324_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB324_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB324_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB324_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB324_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB324_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB324_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB324_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB324_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB324_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB324_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB324_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB324_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB324_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB324_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB325_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB325_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB325_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB325_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB325_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB325_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB325_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB325_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB325_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB325_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB325_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB325_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB325_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB325_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB325_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB325_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB325_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB325_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB325_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB325_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB326_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB326_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB326_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB326_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB326_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB326_7 +; X86-NOBMI2-NEXT: .LBB326_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB326_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB326_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB326_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB326_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB326_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB326_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB326_7 +; X86-BMI2-NEXT: .LBB326_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB326_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB326_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB326_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB326_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB326_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_xor_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB327_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB327_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB327_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB327_5 +; X86-NOBMI2-NEXT: .LBB327_4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB327_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB327_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB327_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB327_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB327_5 +; X86-BMI2-NEXT: .LBB327_4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB327_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB327_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB327_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB327_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB327_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB328_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB328_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB328_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB328_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB328_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB328_7 +; X86-NOBMI2-NEXT: .LBB328_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB328_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB328_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB328_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB328_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB328_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB328_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB328_7 +; X86-BMI2-NEXT: .LBB328_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB328_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB328_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB328_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB328_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB328_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB329_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB329_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB329_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB329_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB329_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB329_7 +; X86-NOBMI2-NEXT: .LBB329_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB329_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB329_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB329_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB329_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB329_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB329_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB329_7 +; X86-BMI2-NEXT: .LBB329_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB329_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB329_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB329_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB329_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB329_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB329_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB329_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB329_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB329_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB330_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB330_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB330_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB330_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB330_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB330_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB330_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB330_9 +; X86-NOBMI2-NEXT: .LBB330_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB330_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB330_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB330_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB330_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB330_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB330_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB330_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB330_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB330_9 +; X86-BMI2-NEXT: .LBB330_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB330_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB330_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: xorq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB330_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB330_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB330_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB330_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB330_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: jae .LBB330_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB330_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB331_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB331_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB331_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB331_5 +; X86-NOBMI2-NEXT: .LBB331_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB331_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB331_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB331_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB331_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: jmp .LBB331_5 +; X86-BMI2-NEXT: .LBB331_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB331_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB331_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB331_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB331_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB331_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB332_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB332_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB332_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB332_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB332_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB332_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB332_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB332_9 +; X86-NOBMI2-NEXT: .LBB332_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB332_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB332_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB332_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB332_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB332_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB332_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB332_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB332_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB332_9 +; X86-BMI2-NEXT: .LBB332_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB332_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB332_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB332_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB332_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB332_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB333_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB333_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB333_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB333_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB333_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB333_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB333_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB333_9 +; X86-NOBMI2-NEXT: .LBB333_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB333_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB333_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB333_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB333_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB333_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB333_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB333_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB333_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB333_9 +; X86-BMI2-NEXT: .LBB333_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB333_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB333_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB333_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB333_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB333_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB334_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB334_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB334_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB334_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB334_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB334_7 +; X86-NOBMI2-NEXT: .LBB334_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB334_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB334_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB334_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB334_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB334_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB334_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB334_7 +; X86-BMI2-NEXT: .LBB334_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB334_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btcq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB334_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB334_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btcq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB334_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB334_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw xor ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_xor_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_xor_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB335_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: xorl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB335_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB335_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB335_5 +; X86-NOBMI2-NEXT: .LBB335_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB335_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_xor_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB335_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: xorl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB335_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB335_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB335_5 +; X86-BMI2-NEXT: .LBB335_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB335_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_xor_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB335_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: xorq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB335_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB335_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB335_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw xor ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB336_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB336_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB336_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB336_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB336_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB336_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB336_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB336_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl2_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB337_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB337_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB337_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB337_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB337_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB337_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB337_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB337_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB337_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB337_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB337_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB337_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_neq_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB338_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB338_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB338_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB338_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB338_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB338_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB338_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB338_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB338_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB338_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB338_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB338_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB338_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB338_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB338_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB338_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movl $1, %ecx +; X64-BMI2-NEXT: shlxq %rsi, %rcx, %rcx +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB339_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB339_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: .cfi_offset %esi, -16 +; X86-BMI2-NEXT: .cfi_offset %edi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $1, %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB339_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %edi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB339_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_mask0_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB340_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB340_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB340_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB340_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB340_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB340_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB340_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB340_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB340_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB340_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB340_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB340_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask1_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB341_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB341_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB341_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB341_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB341_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB341_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB341_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB341_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB341_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB341_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB341_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB341_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask01_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB342_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB342_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB342_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB342_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB342_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB342_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB342_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB342_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_blsi_or_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB343_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB343_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB343_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB343_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB343_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB343_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + ret i64 %and3 +} + +define i64 @atomic_shl1_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB344_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB344_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB344_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB344_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB344_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB344_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB344_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB344_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB344_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB344_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB344_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB344_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB344_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB344_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB344_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB344_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB345_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB345_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB345_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB345_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB345_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB345_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB345_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB345_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB345_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB345_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB345_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB345_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB346_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB346_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB346_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB346_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB346_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB346_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB346_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB346_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB346_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB346_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB346_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB346_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB346_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB346_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB346_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB346_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: movzbl %sil, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %add + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB347_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB347_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %ebp, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl $31, %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %esi, %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB347_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB347_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: btl %esi, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB347_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB347_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB347_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB347_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btl %esi, %eax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB348_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB348_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB348_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB348_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB348_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB348_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB348_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB348_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB348_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB348_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB348_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB348_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB348_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB348_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB348_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB348_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB349_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB349_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB349_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB349_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB349_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB349_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB349_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB349_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB349_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB349_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB349_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB349_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB349_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB349_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB349_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB349_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB350_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB350_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB350_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB350_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB350_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB350_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB350_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB350_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB350_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB350_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB350_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB350_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB350_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB350_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB350_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB350_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_or_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB351_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB351_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB351_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB351_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB351_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB351_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB352_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB352_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB352_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB352_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB352_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB352_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB352_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB352_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB352_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB352_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB352_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB352_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB352_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB352_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB352_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB352_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB353_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB353_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB353_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB353_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB353_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB353_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB353_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB353_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB353_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq %rdx, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB353_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB353_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB353_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool = icmp ne i64 %and, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB354_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB354_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB354_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB354_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB354_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB354_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB354_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB354_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB354_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB354_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB354_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB354_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB354_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB354_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB354_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB354_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %1 = lshr i64 %0, %add + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB355_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB355_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: btl %ebp, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl $31, %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %esi, %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB355_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB355_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrxl %esi, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB355_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB355_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB355_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB355_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB356_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB356_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB356_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB356_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB356_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB356_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB356_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB356_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB356_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB356_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB356_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB356_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB356_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB356_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB356_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB356_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB357_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB357_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB357_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB357_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB357_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB357_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB357_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB357_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB357_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB357_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB357_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB357_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB357_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB357_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB357_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB357_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB358_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB358_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB358_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB358_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB358_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB358_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB358_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB358_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB358_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB358_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB358_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB358_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB358_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB358_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB358_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB358_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_or_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB359_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB359_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %eax +; X86-NOBMI2-NEXT: andl %esi, %edx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB359_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB359_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB359_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB359_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool = icmp ne i64 %and3, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB360_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB360_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB360_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB360_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB360_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB360_7 +; X86-NOBMI2-NEXT: .LBB360_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB360_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB360_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB360_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB360_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB360_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB360_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB360_7 +; X86-BMI2-NEXT: .LBB360_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB360_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB360_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB360_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB360_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB360_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB361_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB361_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB361_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB361_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB361_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB361_7 +; X86-NOBMI2-NEXT: .LBB361_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB361_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB361_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB361_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB361_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB361_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB361_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB361_7 +; X86-BMI2-NEXT: .LBB361_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB361_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB361_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB361_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB361_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB361_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB361_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB361_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB361_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB361_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB362_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB362_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB362_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB362_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB362_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB362_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB362_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB362_9 +; X86-NOBMI2-NEXT: .LBB362_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB362_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB362_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB362_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB362_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB362_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB362_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB362_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB362_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB362_9 +; X86-BMI2-NEXT: .LBB362_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB362_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB362_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB362_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB362_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB362_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB362_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB362_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: jae .LBB362_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB362_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB363_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB363_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB363_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB363_5 +; X86-NOBMI2-NEXT: .LBB363_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB363_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB363_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB363_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB363_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: jmp .LBB363_5 +; X86-BMI2-NEXT: .LBB363_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB363_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB363_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB363_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB363_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB363_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB364_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB364_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB364_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB364_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB364_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB364_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB364_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB364_9 +; X86-NOBMI2-NEXT: .LBB364_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB364_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB364_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB364_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB364_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB364_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB364_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB364_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB364_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB364_9 +; X86-BMI2-NEXT: .LBB364_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB364_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB364_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB364_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB364_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB364_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB365_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB365_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB365_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB365_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB365_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB365_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB365_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB365_9 +; X86-NOBMI2-NEXT: .LBB365_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB365_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB365_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB365_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB365_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB365_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB365_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB365_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB365_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB365_9 +; X86-BMI2-NEXT: .LBB365_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB365_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB365_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB365_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB365_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB365_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB366_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB366_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB366_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB366_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB366_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB366_7 +; X86-NOBMI2-NEXT: .LBB366_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB366_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB366_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB366_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB366_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB366_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB366_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB366_7 +; X86-BMI2-NEXT: .LBB366_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB366_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB366_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB366_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB366_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB366_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_or_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB367_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB367_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB367_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB367_5 +; X86-NOBMI2-NEXT: .LBB367_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB367_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB367_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB367_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB367_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB367_5 +; X86-BMI2-NEXT: .LBB367_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB367_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB367_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB367_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB367_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB367_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB368_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB368_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB368_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB368_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB368_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB368_7 +; X86-NOBMI2-NEXT: .LBB368_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB368_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB368_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB368_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB368_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB368_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB368_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB368_7 +; X86-BMI2-NEXT: .LBB368_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB368_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB368_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB368_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB368_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB368_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB369_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB369_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB369_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB369_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB369_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB369_7 +; X86-NOBMI2-NEXT: .LBB369_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB369_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB369_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB369_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB369_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB369_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB369_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB369_7 +; X86-BMI2-NEXT: .LBB369_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB369_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shlq %cl, %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB369_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rdx +; X64-NOBMI2-NEXT: orq %rsi, %rdx +; X64-NOBMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB369_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testq %rsi, %rax +; X64-NOBMI2-NEXT: je .LBB369_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB369_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB369_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB369_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB369_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB369_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB370_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB370_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB370_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB370_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB370_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB370_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB370_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB370_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB370_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB370_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB370_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB370_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB370_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB370_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB370_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB370_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB370_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB370_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btq %rsi, %rax +; X64-NOBMI2-NEXT: jae .LBB370_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB370_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB370_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB370_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: jae .LBB370_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB370_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB371_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB371_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB371_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB371_5 +; X86-NOBMI2-NEXT: .LBB371_4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: .LBB371_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB371_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB371_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB371_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB371_5 +; X86-BMI2-NEXT: .LBB371_4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: .LBB371_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB371_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB371_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB371_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB371_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB372_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB372_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB372_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB372_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB372_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB372_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB372_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB372_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB372_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB372_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB372_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB372_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB372_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB372_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB372_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB372_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB372_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB372_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB372_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB372_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB373_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB373_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB373_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB373_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB373_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB373_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB373_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB373_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB373_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB373_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB373_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB373_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB373_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB373_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB373_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB373_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB373_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB373_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB373_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB373_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB374_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB374_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB374_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB374_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB374_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB374_7 +; X86-NOBMI2-NEXT: .LBB374_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB374_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB374_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB374_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB374_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB374_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB374_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB374_7 +; X86-BMI2-NEXT: .LBB374_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB374_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB374_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB374_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB374_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB374_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_or_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB375_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB375_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB375_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB375_5 +; X86-NOBMI2-NEXT: .LBB375_4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB375_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB375_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB375_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB375_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB375_5 +; X86-BMI2-NEXT: .LBB375_4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB375_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB375_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq %rdx, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB375_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB375_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB375_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB376_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB376_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB376_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB376_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB376_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB376_7 +; X86-NOBMI2-NEXT: .LBB376_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB376_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB376_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB376_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB376_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB376_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB376_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB376_7 +; X86-BMI2-NEXT: .LBB376_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB376_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB376_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB376_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB376_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB376_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB377_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB377_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB377_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB377_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB377_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB377_7 +; X86-NOBMI2-NEXT: .LBB377_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB377_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB377_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB377_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB377_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB377_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB377_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB377_7 +; X86-BMI2-NEXT: .LBB377_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB377_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB377_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB377_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB377_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB377_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB377_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB377_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB377_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB377_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB378_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB378_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB378_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB378_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB378_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB378_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB378_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB378_9 +; X86-NOBMI2-NEXT: .LBB378_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB378_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB378_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB378_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB378_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB378_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB378_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB378_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB378_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB378_9 +; X86-BMI2-NEXT: .LBB378_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB378_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB378_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: orq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB378_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB378_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB378_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $1, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB378_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB378_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rsi), %ecx +; X64-BMI2-NEXT: movzbl %cl, %ecx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: jae .LBB378_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB378_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: movl %ecx, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB379_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB379_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %edi, %eax +; X86-NOBMI2-NEXT: je .LBB379_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esi,%ebp,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ebp,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB379_5 +; X86-NOBMI2-NEXT: .LBB379_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB379_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: andl $31, %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %edi, %eax, %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB379_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB379_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %ebp, %eax +; X86-BMI2-NEXT: je .LBB379_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esi,%edi,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%edi,8), %edx +; X86-BMI2-NEXT: jmp .LBB379_5 +; X86-BMI2-NEXT: .LBB379_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB379_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB379_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB379_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btsq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB379_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB379_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB380_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB380_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB380_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB380_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB380_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB380_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB380_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB380_9 +; X86-NOBMI2-NEXT: .LBB380_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB380_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB380_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB380_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB380_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB380_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB380_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB380_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB380_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB380_9 +; X86-BMI2-NEXT: .LBB380_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB380_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB380_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB380_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB380_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB380_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB381_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB381_2: # %entry +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB381_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB381_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB381_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB381_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB381_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB381_9 +; X86-NOBMI2-NEXT: .LBB381_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB381_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB381_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB381_2: # %entry +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB381_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB381_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB381_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB381_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB381_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB381_9 +; X86-BMI2-NEXT: .LBB381_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB381_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB381_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB381_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB381_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB381_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB382_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB382_2: # %entry +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB382_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB382_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB382_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB382_7 +; X86-NOBMI2-NEXT: .LBB382_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB382_7: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB382_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB382_2: # %entry +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB382_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB382_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: andl %edi, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB382_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB382_7 +; X86-BMI2-NEXT: .LBB382_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB382_7: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btsq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB382_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB382_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btsq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB382_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB382_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %0 = atomicrmw or ptr %v, i64 %shl monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_or_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_or_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB383_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: orl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB383_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB383_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB383_5 +; X86-NOBMI2-NEXT: .LBB383_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB383_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_or_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB383_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: orl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB383_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: andl %ebp, %edx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB383_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB383_5 +; X86-BMI2-NEXT: .LBB383_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB383_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_or_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB383_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rdx +; X64-BMI2-NEXT: orq %rcx, %rdx +; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) +; X64-BMI2-NEXT: jne .LBB383_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB383_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB383_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %0 = atomicrmw or ptr %v, i64 %and monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB384_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB384_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB384_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB384_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB384_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB384_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB384_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB384_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl2_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB385_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB385_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB385_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB385_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB385_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB385_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB385_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB385_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rcx +; X64-NOBMI2-NEXT: notq %rcx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB385_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rcx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB385_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB385_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB385_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_neq_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB386_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB386_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB386_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB386_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB386_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB386_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB386_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB386_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB386_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB386_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB386_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB386_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB386_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB386_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB386_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB386_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: movl $1, %edx +; X64-BMI2-NEXT: shlxq %rcx, %rdx, %rcx +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB387_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB387_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB387_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB387_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_shl1_mask0_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB388_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB388_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB388_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB388_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB388_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB388_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB388_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB388_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB388_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB388_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB388_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB388_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask1_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB389_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB389_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB389_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB389_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %esi, %edi +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB389_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB389_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %edi, %edx +; X86-NOBMI2-NEXT: andl %esi, %eax +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB389_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB389_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB389_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB389_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %edi +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %edi, %esi +; X86-BMI2-NEXT: shlxl %ecx, %edi, %edi +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB389_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %edi, %esi +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: .LBB389_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %esi, %edx +; X86-BMI2-NEXT: andl %edi, %eax +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + ret i64 %and +} + +define i64 @atomic_shl1_mask01_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB390_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB390_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB390_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB390_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB390_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB390_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB390_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB390_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + ret i64 %and +} + +define i64 @atomic_blsi_and_64_gpr_val(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB391_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB391_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB391_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB391_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB391_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB391_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + ret i64 %and3 +} + +define i64 @atomic_shl1_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB392_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB392_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB392_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB392_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB392_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB392_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB392_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB392_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB392_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB392_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB392_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB392_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB392_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB392_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB392_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB392_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB393_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB393_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB393_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB393_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB393_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB393_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB393_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB393_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rcx +; X64-NOBMI2-NEXT: notq %rcx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB393_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rcx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB393_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB393_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rcx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB393_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB394_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB394_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB394_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB394_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB394_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB394_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB394_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB394_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB394_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB394_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB394_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB394_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB394_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB394_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB394_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB394_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: movzbl %cl, %edx +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %add + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB395_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB395_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setae %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB395_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB395_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-BMI2-NEXT: btl %edx, %eax +; X86-BMI2-NEXT: setae %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB395_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB395_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: andl $31, %ecx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB395_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB395_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btl %ecx, %eax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB396_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB396_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB396_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB396_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB396_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB396_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB396_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB396_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB396_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB396_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB396_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB396_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB396_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB396_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB396_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB396_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %c + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB397_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB397_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB397_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB397_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB397_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB397_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB397_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB397_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB397_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB397_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB397_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB397_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB397_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB397_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setae %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB397_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB397_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %edx, %edx +; X64-BMI2-NEXT: btq %rcx, %rax +; X64-BMI2-NEXT: setae %dl +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB398_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB398_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB398_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB398_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: notl %eax +; X86-NOBMI2-NEXT: notl %edx +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB398_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB398_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB398_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB398_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB398_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB398_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: notl %eax +; X86-BMI2-NEXT: notl %edx +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB398_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB398_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $63, %edx +; X64-NOBMI2-NEXT: movq $-2, %rsi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: rolq %cl, %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB398_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq %rsi, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB398_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setae %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: andl $63, %edx +; X64-BMI2-NEXT: movq $-2, %rsi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI2-NEXT: rolq %cl, %rsi +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB398_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq %rsi, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB398_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: setae %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = xor i64 %0, -1 + %2 = lshr i64 %1, %rem + %conv = and i64 %2, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_and_64_gpr_valz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB399_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB399_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB399_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB399_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB399_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rcx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB399_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + %conv = zext i1 %tobool.not to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB400_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB400_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB400_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB400_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB400_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB400_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB400_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB400_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB400_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB400_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB400_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB400_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB400_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB400_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB400_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB400_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rcx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl2_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB401_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB401_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB401_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB401_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB401_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB401_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB401_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB401_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rcx +; X64-NOBMI2-NEXT: notq %rcx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB401_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rcx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB401_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: setne %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB401_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rcx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB401_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool = icmp ne i64 %and, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_neq_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB402_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB402_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB402_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB402_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB402_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB402_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB402_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB402_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB402_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB402_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB402_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB402_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB402_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB402_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: incb %cl +; X64-NOBMI2-NEXT: movzbl %cl, %edx +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB402_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB402_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: incb %cl +; X64-BMI2-NEXT: shrxq %rcx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %1 = lshr i64 %0, %add + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB403_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB403_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-NOBMI2-NEXT: btl %edx, %eax +; X86-NOBMI2-NEXT: setb %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB403_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB403_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB403_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB403_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btl %ecx, %eax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: andl $31, %ecx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB403_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB403_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rcx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask0_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB404_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB404_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB404_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB404_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB404_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB404_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB404_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB404_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB404_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB404_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB404_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB404_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB404_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB404_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB404_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB404_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rcx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = lshr i64 %0, %c + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask1_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB405_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB405_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB405_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB405_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB405_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB405_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB405_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB405_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB405_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB405_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB405_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB405_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB405_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB405_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %edx, %edx +; X64-NOBMI2-NEXT: btq %rcx, %rax +; X64-NOBMI2-NEXT: setb %dl +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB405_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB405_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rcx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_shl1_mask01_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: pushl %eax +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $63, %ecx +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB406_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB406_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB406_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB406_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: jne .LBB406_6 +; X86-NOBMI2-NEXT: # %bb.5: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: .LBB406_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $1, %edx +; X86-NOBMI2-NEXT: movl %edx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $4, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: pushl %eax +; X86-BMI2-NEXT: .cfi_def_cfa_offset 24 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $63, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: je .LBB406_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB406_2: # %entry +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB406_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB406_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB406_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax +; X86-BMI2-NEXT: .LBB406_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $4, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl %ecx, %edx +; X64-NOBMI2-NEXT: andl $63, %edx +; X64-NOBMI2-NEXT: movq $-2, %rsi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI2-NEXT: rolq %cl, %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB406_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq %rsi, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB406_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: setb %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movl %ecx, %edx +; X64-BMI2-NEXT: andl $63, %edx +; X64-BMI2-NEXT: movq $-2, %rsi +; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI2-NEXT: rolq %cl, %rsi +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB406_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq %rsi, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB406_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrxq %rdx, %rax, %rax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %1 = lshr i64 %0, %rem + %conv = and i64 %1, 1 + ret i64 %conv +} + +define i64 @atomic_blsi_and_64_gpr_valnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: movl %eax, %ebp +; X86-NOBMI2-NEXT: negl %ebp +; X86-NOBMI2-NEXT: sbbl %ecx, %esi +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB407_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %esi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB407_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: setne %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: movl %eax, %ebp +; X86-BMI2-NEXT: negl %ebp +; X86-BMI2-NEXT: sbbl %ecx, %esi +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB407_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB407_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: setne %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB407_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rcx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB407_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: setne %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + %tobool = icmp ne i64 %and3, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @atomic_shl1_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB408_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB408_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB408_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB408_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB408_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB408_7 +; X86-NOBMI2-NEXT: .LBB408_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB408_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB408_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB408_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB408_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB408_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB408_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB408_7 +; X86-BMI2-NEXT: .LBB408_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB408_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB408_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB408_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB408_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB408_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB409_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB409_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB409_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB409_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB409_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB409_7 +; X86-NOBMI2-NEXT: .LBB409_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB409_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB409_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB409_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB409_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB409_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB409_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB409_7 +; X86-BMI2-NEXT: .LBB409_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB409_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rsi +; X64-NOBMI2-NEXT: notq %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB409_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %r8 +; X64-NOBMI2-NEXT: andq %rsi, %r8 +; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB409_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB409_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB409_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB409_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rdx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB409_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB409_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB409_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB410_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB410_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB410_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB410_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB410_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB410_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB410_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB410_9 +; X86-NOBMI2-NEXT: .LBB410_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB410_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB410_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB410_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB410_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB410_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB410_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB410_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB410_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB410_9 +; X86-BMI2-NEXT: .LBB410_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB410_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB410_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB410_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB410_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB410_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB410_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB410_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %edx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: jae .LBB410_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB410_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB411_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB411_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NOBMI2-NEXT: je .LBB411_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB411_5 +; X86-NOBMI2-NEXT: .LBB411_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB411_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB411_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB411_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-BMI2-NEXT: je .LBB411_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB411_5 +; X86-BMI2-NEXT: .LBB411_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB411_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB411_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB411_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB411_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB411_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB412_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB412_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB412_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB412_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB412_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB412_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB412_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB412_9 +; X86-NOBMI2-NEXT: .LBB412_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB412_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB412_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB412_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB412_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB412_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB412_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB412_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB412_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB412_9 +; X86-BMI2-NEXT: .LBB412_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB412_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB412_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB412_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB412_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB412_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB413_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB413_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB413_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB413_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB413_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB413_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB413_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB413_9 +; X86-NOBMI2-NEXT: .LBB413_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB413_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB413_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB413_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB413_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB413_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB413_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB413_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB413_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB413_9 +; X86-BMI2-NEXT: .LBB413_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB413_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB413_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB413_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB413_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB413_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB414_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB414_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB414_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB414_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB414_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB414_7 +; X86-NOBMI2-NEXT: .LBB414_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB414_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB414_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB414_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB414_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB414_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB414_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB414_7 +; X86-BMI2-NEXT: .LBB414_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB414_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB414_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB414_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB414_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB414_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_and_64_gpr_br(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB415_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB415_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB415_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB415_5 +; X86-NOBMI2-NEXT: .LBB415_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB415_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB415_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB415_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB415_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB415_5 +; X86-BMI2-NEXT: .LBB415_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB415_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB415_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rdx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB415_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB415_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB415_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB416_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB416_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB416_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB416_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB416_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB416_7 +; X86-NOBMI2-NEXT: .LBB416_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB416_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB416_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB416_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB416_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB416_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB416_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB416_7 +; X86-BMI2-NEXT: .LBB416_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB416_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB416_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB416_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB416_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB416_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB417_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB417_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB417_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB417_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB417_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB417_7 +; X86-NOBMI2-NEXT: .LBB417_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB417_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB417_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB417_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB417_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB417_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB417_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB417_7 +; X86-BMI2-NEXT: .LBB417_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB417_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %esi +; X64-NOBMI2-NEXT: shlq %cl, %rsi +; X64-NOBMI2-NEXT: movq %rsi, %rdx +; X64-NOBMI2-NEXT: notq %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB417_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %r8 +; X64-NOBMI2-NEXT: andq %rdx, %r8 +; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB417_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: testq %rsi, %rax +; X64-NOBMI2-NEXT: je .LBB417_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB417_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB417_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rcx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB417_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB417_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB417_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB418_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB418_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB418_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB418_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB418_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB418_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB418_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB418_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB418_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB418_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB418_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB418_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB418_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB418_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB418_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB418_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB418_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB418_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %esi +; X64-NOBMI2-NEXT: movl $123, %edx +; X64-NOBMI2-NEXT: btq %rsi, %rax +; X64-NOBMI2-NEXT: jae .LBB418_3 +; X64-NOBMI2-NEXT: # %bb.4: # %return +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB418_3: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB418_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB418_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %esi +; X64-BMI2-NEXT: movl $123, %edx +; X64-BMI2-NEXT: btq %rsi, %rax +; X64-BMI2-NEXT: jae .LBB418_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB418_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rdx +; X64-BMI2-NEXT: movq %rdx, %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB419_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB419_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NOBMI2-NEXT: je .LBB419_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB419_5 +; X86-NOBMI2-NEXT: .LBB419_4: # %if.then +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB419_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB419_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB419_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-BMI2-NEXT: je .LBB419_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB419_5 +; X86-BMI2-NEXT: .LBB419_4: # %if.then +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB419_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB419_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB419_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB419_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB419_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB420_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB420_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB420_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB420_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB420_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB420_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB420_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB420_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB420_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB420_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB420_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB420_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB420_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB420_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB420_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB420_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB420_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB420_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB420_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB420_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB421_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB421_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB421_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB421_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB421_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB421_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: movl $123, %ecx +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: jne .LBB421_8 +; X86-NOBMI2-NEXT: # %bb.7: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-NOBMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-NOBMI2-NEXT: .LBB421_8: # %return +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB421_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB421_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB421_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB421_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB421_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB421_6: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: movl $123, %ecx +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: jne .LBB421_8 +; X86-BMI2-NEXT: # %bb.7: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl (%edi,%eax,8), %ecx +; X86-BMI2-NEXT: movl 4(%edi,%eax,8), %esi +; X86-BMI2-NEXT: .LBB421_8: # %return +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB421_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB421_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB421_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB421_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB422_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB422_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB422_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB422_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB422_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB422_7 +; X86-NOBMI2-NEXT: .LBB422_6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB422_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB422_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB422_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB422_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB422_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB422_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB422_7 +; X86-BMI2-NEXT: .LBB422_6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB422_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB422_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB422_1: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB422_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB422_1: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_and_64_gpr_brz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB423_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB423_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB423_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB423_5 +; X86-NOBMI2-NEXT: .LBB423_4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: .LBB423_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB423_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB423_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB423_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB423_5 +; X86-BMI2-NEXT: .LBB423_4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: .LBB423_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rdx +; X64-BMI2-NEXT: movq %rdx, %rcx +; X64-BMI2-NEXT: notq %rcx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB423_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rcx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB423_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testq %rdx, %rax +; X64-BMI2-NEXT: je .LBB423_3 +; X64-BMI2-NEXT: # %bb.4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB423_3: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rcx +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB424_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB424_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB424_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB424_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB424_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB424_7 +; X86-NOBMI2-NEXT: .LBB424_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB424_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB424_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB424_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB424_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB424_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB424_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB424_7 +; X86-BMI2-NEXT: .LBB424_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB424_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB424_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB424_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB424_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB424_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl2_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl2_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $2, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB425_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB425_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB425_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB425_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB425_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB425_7 +; X86-NOBMI2-NEXT: .LBB425_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB425_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl2_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $2, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB425_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB425_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB425_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB425_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB425_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB425_7 +; X86-BMI2-NEXT: .LBB425_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB425_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl2_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movl $2, %edx +; X64-NOBMI2-NEXT: shlq %cl, %rdx +; X64-NOBMI2-NEXT: movq %rdx, %rsi +; X64-NOBMI2-NEXT: notq %rsi +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB425_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %r8 +; X64-NOBMI2-NEXT: andq %rsi, %r8 +; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB425_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: testq %rdx, %rax +; X64-NOBMI2-NEXT: je .LBB425_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB425_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl2_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl $2, %eax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB425_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rdx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB425_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB425_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB425_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl i64 2, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB426_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB426_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB426_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB426_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: incb %cl +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB426_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB426_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB426_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB426_9 +; X86-NOBMI2-NEXT: .LBB426_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB426_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB426_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB426_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB426_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB426_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx +; X86-BMI2-NEXT: incb %cl +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB426_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB426_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB426_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-BMI2-NEXT: jmp .LBB426_9 +; X86-BMI2-NEXT: .LBB426_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB426_9: # %return +; X86-BMI2-NEXT: movl %esi, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: movq $-2, %rdx +; X64-NOBMI2-NEXT: rolq %cl, %rdx +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB426_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rsi +; X64-NOBMI2-NEXT: andq %rdx, %rsi +; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB426_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: leal 1(%rcx), %edx +; X64-NOBMI2-NEXT: movzbl %dl, %edx +; X64-NOBMI2-NEXT: btq %rdx, %rax +; X64-NOBMI2-NEXT: jae .LBB426_3 +; X64-NOBMI2-NEXT: # %bb.4: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB426_3: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq %rsi, %rcx +; X64-BMI2-NEXT: movq $-2, %rdx +; X64-BMI2-NEXT: rolq %cl, %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB426_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rsi +; X64-BMI2-NEXT: andq %rdx, %rsi +; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) +; X64-BMI2-NEXT: jne .LBB426_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: leal 1(%rcx), %edx +; X64-BMI2-NEXT: movzbl %dl, %edx +; X64-BMI2-NEXT: btq %rdx, %rax +; X64-BMI2-NEXT: jae .LBB426_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB426_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %add = add i64 %c, 1 + %shl1 = shl nuw i64 1, %add + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_small_mask_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: movl $1, %eax +; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: movl $-2, %ebp +; X86-NOBMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: roll %cl, %ebp +; X86-NOBMI2-NEXT: movl $-1, %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB427_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %ebp, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB427_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NOBMI2-NEXT: je .LBB427_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB427_5 +; X86-NOBMI2-NEXT: .LBB427_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB427_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: movl $-2, %ebp +; X86-BMI2-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI2-NEXT: roll %cl, %ebp +; X86-BMI2-NEXT: movl $-1, %edi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB427_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB427_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-BMI2-NEXT: je .LBB427_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB427_5 +; X86-BMI2-NEXT: .LBB427_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB427_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB427_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB427_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB427_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB427_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 31 + %shl = shl nuw nsw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %rem + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask0_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB428_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB428_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB428_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB428_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB428_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB428_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB428_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB428_9 +; X86-NOBMI2-NEXT: .LBB428_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB428_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB428_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB428_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB428_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB428_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB428_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB428_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB428_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB428_9 +; X86-BMI2-NEXT: .LBB428_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB428_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB428_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB428_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB428_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB428_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %shl1 = shl nuw i64 1, %c + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask1_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: movl $1, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %esi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %esi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB429_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %esi, %ebp +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: .LBB429_2: # %entry +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB429_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB429_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: movl $1, %ebx +; X86-NOBMI2-NEXT: xorl %esi, %esi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: shldl %cl, %ebx, %ebp +; X86-NOBMI2-NEXT: shll %cl, %ebx +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB429_6 +; X86-NOBMI2-NEXT: # %bb.5: +; X86-NOBMI2-NEXT: movl %ebx, %ebp +; X86-NOBMI2-NEXT: xorl %ebx, %ebx +; X86-NOBMI2-NEXT: .LBB429_6: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl %ebp, %edx +; X86-NOBMI2-NEXT: andl %ebx, %eax +; X86-NOBMI2-NEXT: orl %edx, %eax +; X86-NOBMI2-NEXT: je .LBB429_7 +; X86-NOBMI2-NEXT: # %bb.8: # %if.then +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %esi +; X86-NOBMI2-NEXT: jmp .LBB429_9 +; X86-NOBMI2-NEXT: .LBB429_7: +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB429_9: # %return +; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %esi, %esi +; X86-BMI2-NEXT: shldl %cl, %eax, %esi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB429_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %esi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB429_2: # %entry +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB429_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %esi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB429_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: movl $1, %ebp +; X86-BMI2-NEXT: xorl %ebx, %ebx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shldl %cl, %ebp, %ebx +; X86-BMI2-NEXT: shlxl %ecx, %ebp, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB429_6 +; X86-BMI2-NEXT: # %bb.5: +; X86-BMI2-NEXT: movl %ebp, %ebx +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB429_6: # %atomicrmw.end +; X86-BMI2-NEXT: andl %ebx, %edx +; X86-BMI2-NEXT: andl %ebp, %eax +; X86-BMI2-NEXT: orl %edx, %eax +; X86-BMI2-NEXT: je .LBB429_7 +; X86-BMI2-NEXT: # %bb.8: # %if.then +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB429_9 +; X86-BMI2-NEXT: .LBB429_7: +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: .LBB429_9: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB429_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB429_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB429_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB429_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %shl = shl nuw i64 1, %c + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %rem = and i64 %c, 63 + %shl1 = shl nuw i64 1, %rem + %and = and i64 %0, %shl1 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_mask01_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl $1, %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: shldl %cl, %edi, %ebp +; X86-NOBMI2-NEXT: shll %cl, %edi +; X86-NOBMI2-NEXT: testb $32, %cl +; X86-NOBMI2-NEXT: je .LBB430_2 +; X86-NOBMI2-NEXT: # %bb.1: +; X86-NOBMI2-NEXT: movl %edi, %ebp +; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: .LBB430_2: # %entry +; X86-NOBMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %edi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB430_3: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %edi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB430_3 +; X86-NOBMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl (%esp), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB430_5 +; X86-NOBMI2-NEXT: # %bb.6: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB430_7 +; X86-NOBMI2-NEXT: .LBB430_5: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB430_7: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl $1, %eax +; X86-BMI2-NEXT: xorl %edi, %edi +; X86-BMI2-NEXT: shldl %cl, %eax, %edi +; X86-BMI2-NEXT: shlxl %ecx, %eax, %ebp +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB430_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %ebp, %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: .LBB430_2: # %entry +; X86-BMI2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %edi +; X86-BMI2-NEXT: movl %ebp, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB430_3: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %ebp, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB430_3 +; X86-BMI2-NEXT: # %bb.4: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB430_5 +; X86-BMI2-NEXT: # %bb.6: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%esi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%esi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB430_7 +; X86-BMI2-NEXT: .LBB430_5: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB430_7: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB430_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB430_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB430_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB430_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %rem = and i64 %c, 63 + %shl = shl nuw i64 1, %rem + %not = xor i64 %shl, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and = and i64 %0, %shl + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_blsi_and_64_gpr_brnz(ptr %v, i64 %c) { +; X86-NOBMI2-LABEL: atomic_blsi_and_64_gpr_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: subl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -20 +; X86-NOBMI2-NEXT: .cfi_offset %edi, -16 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebp, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI2-NEXT: xorl %ebp, %ebp +; X86-NOBMI2-NEXT: movl %ecx, %esi +; X86-NOBMI2-NEXT: negl %esi +; X86-NOBMI2-NEXT: sbbl %eax, %ebp +; X86-NOBMI2-NEXT: andl %eax, %ebp +; X86-NOBMI2-NEXT: andl %ecx, %esi +; X86-NOBMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %ebp +; X86-NOBMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-NOBMI2-NEXT: notl %esi +; X86-NOBMI2-NEXT: movl (%edi), %eax +; X86-NOBMI2-NEXT: movl 4(%edi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB431_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl %esi, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: andl %ebp, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%edi) +; X86-NOBMI2-NEXT: jne .LBB431_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-NOBMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NOBMI2-NEXT: orl %eax, %edx +; X86-NOBMI2-NEXT: je .LBB431_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-NOBMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-NOBMI2-NEXT: jmp .LBB431_5 +; X86-NOBMI2-NEXT: .LBB431_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB431_5: # %return +; X86-NOBMI2-NEXT: addl $8, %esp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebp +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_blsi_and_64_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: subl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 28 +; X86-BMI2-NEXT: .cfi_offset %esi, -20 +; X86-BMI2-NEXT: .cfi_offset %edi, -16 +; X86-BMI2-NEXT: .cfi_offset %ebx, -12 +; X86-BMI2-NEXT: .cfi_offset %ebp, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: xorl %ebp, %ebp +; X86-BMI2-NEXT: movl %ecx, %esi +; X86-BMI2-NEXT: negl %esi +; X86-BMI2-NEXT: sbbl %eax, %ebp +; X86-BMI2-NEXT: andl %eax, %ebp +; X86-BMI2-NEXT: andl %ecx, %esi +; X86-BMI2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI2-NEXT: notl %ebp +; X86-BMI2-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI2-NEXT: notl %esi +; X86-BMI2-NEXT: movl (%edi), %eax +; X86-BMI2-NEXT: movl 4(%edi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB431_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl %esi, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: andl %ebp, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%edi) +; X86-BMI2-NEXT: jne .LBB431_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl (%esp), %eax # 4-byte Folded Reload +; X86-BMI2-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-BMI2-NEXT: orl %eax, %edx +; X86-BMI2-NEXT: je .LBB431_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl (%edi,%ecx,8), %eax +; X86-BMI2-NEXT: movl 4(%edi,%ecx,8), %edx +; X86-BMI2-NEXT: jmp .LBB431_5 +; X86-BMI2-NEXT: .LBB431_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB431_5: # %return +; X86-BMI2-NEXT: addl $8, %esp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 20 +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 16 +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-BMI2-LABEL: atomic_blsi_and_64_gpr_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: blsiq %rsi, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: notq %rdx +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB431_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %r8 +; X64-BMI2-NEXT: andq %rdx, %r8 +; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) +; X64-BMI2-NEXT: jne .LBB431_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: testq %rcx, %rax +; X64-BMI2-NEXT: je .LBB431_3 +; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB431_3: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %sub = sub i64 0, %c + %and = and i64 %sub, %c + %not = xor i64 %and, -1 + %0 = atomicrmw and ptr %v, i64 %not monotonic, align 8 + %and3 = and i64 %0, %and + %tobool.not = icmp eq i64 %and3, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 %c + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_xor_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB432_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB432_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $16, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB432_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB432_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $16, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + ret i64 %and +} + +define i64 @atomic_shl1_neq_xor_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB433_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB433_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB433_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB433_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB433_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB433_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB433_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB433_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + ret i64 %and +} + +define i64 @atomic_shl1_xor_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB434_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB434_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB434_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB434_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB434_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB434_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB434_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB434_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 4 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_xor_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB435_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB435_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB435_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB435_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB435_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB435_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB435_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB435_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 5 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_xor_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB436_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB436_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB436_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB436_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB436_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB436_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB436_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB436_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 4 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_xor_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB437_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB437_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB437_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB437_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB437_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB437_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB437_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB437_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 5 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_xor_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB438_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB438_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB438_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB438_5 +; X86-NOBMI2-NEXT: .LBB438_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB438_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB438_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB438_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB438_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB438_5 +; X86-BMI2-NEXT: .LBB438_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB438_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB438_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB438_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB438_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB438_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB439_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB439_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB439_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB439_5 +; X86-NOBMI2-NEXT: .LBB439_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB439_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB439_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB439_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB439_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB439_5 +; X86-BMI2-NEXT: .LBB439_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB439_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB439_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB439_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB439_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB439_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB439_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB439_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB439_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB439_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_xor_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB440_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB440_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB440_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB440_5 +; X86-NOBMI2-NEXT: .LBB440_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB440_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB440_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB440_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB440_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB440_5 +; X86-BMI2-NEXT: .LBB440_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB440_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: je .LBB440_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB440_1: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: je .LBB440_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB440_1: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB441_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB441_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB441_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB441_5 +; X86-NOBMI2-NEXT: .LBB441_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB441_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB441_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB441_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB441_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB441_5 +; X86-BMI2-NEXT: .LBB441_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB441_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB441_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB441_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB441_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB441_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB441_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB441_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB441_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB441_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_xor_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_xor_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB442_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB442_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB442_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB442_5 +; X86-NOBMI2-NEXT: .LBB442_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB442_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_xor_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB442_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB442_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB442_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB442_5 +; X86-BMI2-NEXT: .LBB442_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB442_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_xor_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB442_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB442_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_xor_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB442_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB442_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_xor_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB443_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: xorl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB443_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB443_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB443_5 +; X86-NOBMI2-NEXT: .LBB443_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB443_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_xor_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB443_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: xorl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB443_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB443_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB443_5 +; X86-BMI2-NEXT: .LBB443_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB443_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_xor_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB443_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: xorq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB443_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB443_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB443_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_xor_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB443_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: xorq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB443_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB443_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB443_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw xor ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB444_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB444_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $16, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB444_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB444_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $16, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + ret i64 %and +} + +define i64 @atomic_shl1_neq_or_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB445_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB445_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB445_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB445_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB445_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB445_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB445_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB445_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + ret i64 %and +} + +define i64 @atomic_shl1_or_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB446_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB446_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB446_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB446_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB446_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB446_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB446_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB446_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 4 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_or_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB447_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB447_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB447_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB447_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB447_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB447_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB447_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB447_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 5 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_or_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB448_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB448_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB448_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB448_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB448_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB448_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB448_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB448_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 4 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_or_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB449_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB449_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB449_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB449_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB449_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB449_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB449_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB449_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %1 = lshr i64 %0, 5 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_or_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB450_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB450_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB450_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB450_5 +; X86-NOBMI2-NEXT: .LBB450_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB450_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB450_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB450_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB450_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB450_5 +; X86-BMI2-NEXT: .LBB450_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB450_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB450_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB450_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB450_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB450_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB451_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB451_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB451_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB451_5 +; X86-NOBMI2-NEXT: .LBB451_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB451_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB451_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB451_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB451_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB451_5 +; X86-BMI2-NEXT: .LBB451_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB451_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB451_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB451_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB451_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB451_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB451_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB451_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB451_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB451_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB452_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB452_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB452_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB452_5 +; X86-NOBMI2-NEXT: .LBB452_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB452_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB452_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB452_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB452_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB452_5 +; X86-BMI2-NEXT: .LBB452_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB452_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: je .LBB452_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB452_1: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: je .LBB452_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB452_1: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB453_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB453_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB453_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB453_5 +; X86-NOBMI2-NEXT: .LBB453_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB453_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB453_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB453_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB453_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB453_5 +; X86-BMI2-NEXT: .LBB453_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB453_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB453_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB453_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB453_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB453_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB453_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB453_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB453_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB453_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_or_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_or_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB454_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB454_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB454_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB454_5 +; X86-NOBMI2-NEXT: .LBB454_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB454_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_or_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB454_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB454_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB454_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB454_5 +; X86-BMI2-NEXT: .LBB454_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB454_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_or_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB454_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB454_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_or_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB454_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB454_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_or_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB455_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: orl $16, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB455_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB455_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB455_5 +; X86-NOBMI2-NEXT: .LBB455_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB455_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_or_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB455_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: orl $16, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB455_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB455_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB455_5 +; X86-BMI2-NEXT: .LBB455_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB455_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_or_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB455_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: orq $16, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB455_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB455_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB455_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_or_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB455_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: orq $16, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB455_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB455_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB455_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw or ptr %v, i64 16 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB456_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB456_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $16, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB456_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB456_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $16, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 16 + ret i64 %and +} + +define i64 @atomic_shl1_neq_and_64_const_val(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_val: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB457_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB457_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: andl $32, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_val: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB457_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB457_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: andl $32, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB457_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB457_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: andl $32, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_val: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB457_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB457_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: andl $32, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 32 + ret i64 %and +} + +define i64 @atomic_shl1_and_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB458_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB458_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB458_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB458_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB458_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB458_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $16, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB458_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB458_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $16, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %1 = lshr i64 %0, 4 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_and_64_const_valz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_valz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB459_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB459_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: xorl %ecx, %ecx +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: sete %cl +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_valz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB459_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB459_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: xorl %ecx, %ecx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: sete %cl +; X86-BMI2-NEXT: movl %ecx, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_valz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB459_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB459_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: xorl %ecx, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: sete %cl +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_valz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB459_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB459_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: xorl %ecx, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: sete %cl +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %1 = lshr i64 %0, 5 + %2 = and i64 %1, 1 + %lnot.ext = xor i64 %2, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_and_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB460_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB460_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $4, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB460_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB460_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $4, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB460_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB460_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $4, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB460_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB460_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $4, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %1 = lshr i64 %0, 4 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_neq_and_64_const_valnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_valnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB461_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB461_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: shrl $5, %eax +; X86-NOBMI2-NEXT: andl $1, %eax +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_valnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB461_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB461_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: shrl $5, %eax +; X86-BMI2-NEXT: andl $1, %eax +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_valnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB461_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB461_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: shrl $5, %eax +; X64-NOBMI2-NEXT: andl $1, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_valnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB461_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB461_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: shrl $5, %eax +; X64-BMI2-NEXT: andl $1, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %1 = lshr i64 %0, 5 + %lnot.ext = and i64 %1, 1 + ret i64 %lnot.ext +} + +define i64 @atomic_shl1_and_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB462_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB462_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB462_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB462_5 +; X86-NOBMI2-NEXT: .LBB462_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB462_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB462_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB462_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB462_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB462_5 +; X86-BMI2-NEXT: .LBB462_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB462_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB462_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB462_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB462_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB462_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_const_br(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_br: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB463_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB463_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB463_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB463_5 +; X86-NOBMI2-NEXT: .LBB463_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB463_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_br: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB463_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB463_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB463_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB463_5 +; X86-BMI2-NEXT: .LBB463_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB463_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_br: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB463_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB463_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB463_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB463_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_br: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB463_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB463_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB463_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB463_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB464_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB464_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB464_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB464_5 +; X86-NOBMI2-NEXT: .LBB464_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB464_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB464_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB464_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB464_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB464_5 +; X86-BMI2-NEXT: .LBB464_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB464_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: je .LBB464_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB464_1: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: je .LBB464_1 +; X64-BMI2-NEXT: # %bb.2: # %return +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB464_1: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_const_brz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_brz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB465_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB465_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB465_3 +; X86-NOBMI2-NEXT: # %bb.4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: jmp .LBB465_5 +; X86-NOBMI2-NEXT: .LBB465_3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: .LBB465_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_brz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB465_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB465_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB465_3 +; X86-BMI2-NEXT: # %bb.4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: jmp .LBB465_5 +; X86-BMI2-NEXT: .LBB465_3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: .LBB465_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_brz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB465_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB465_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: jne .LBB465_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB465_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB465_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB465_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: jne .LBB465_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB465_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %if.then, label %return + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_and_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_and_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB466_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB466_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $16, %al +; X86-NOBMI2-NEXT: jne .LBB466_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB466_5 +; X86-NOBMI2-NEXT: .LBB466_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB466_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_and_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB466_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB466_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $16, %al +; X86-BMI2-NEXT: jne .LBB466_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB466_5 +; X86-BMI2-NEXT: .LBB466_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB466_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_and_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq $4, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: shlq $4, %rax +; X64-NOBMI2-NEXT: je .LBB466_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rax +; X64-NOBMI2-NEXT: retq +; X64-NOBMI2-NEXT: .LBB466_1: +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_and_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq $4, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlq $4, %rax +; X64-BMI2-NEXT: je .LBB466_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rax +; X64-BMI2-NEXT: retq +; X64-BMI2-NEXT: .LBB466_1: +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 16 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +} + +define i64 @atomic_shl1_neq_and_64_const_brnz(ptr %v) { +; X86-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_brnz: +; X86-NOBMI2: # %bb.0: # %entry +; X86-NOBMI2-NEXT: pushl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-NOBMI2-NEXT: .cfi_offset %esi, -12 +; X86-NOBMI2-NEXT: .cfi_offset %ebx, -8 +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl (%esi), %eax +; X86-NOBMI2-NEXT: movl 4(%esi), %edx +; X86-NOBMI2-NEXT: .p2align 4, 0x90 +; X86-NOBMI2-NEXT: .LBB467_1: # %atomicrmw.start +; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI2-NEXT: movl %eax, %ebx +; X86-NOBMI2-NEXT: andl $-17, %ebx +; X86-NOBMI2-NEXT: movl %edx, %ecx +; X86-NOBMI2-NEXT: lock cmpxchg8b (%esi) +; X86-NOBMI2-NEXT: jne .LBB467_1 +; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-NOBMI2-NEXT: testb $32, %al +; X86-NOBMI2-NEXT: jne .LBB467_4 +; X86-NOBMI2-NEXT: # %bb.3: +; X86-NOBMI2-NEXT: xorl %edx, %edx +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jmp .LBB467_5 +; X86-NOBMI2-NEXT: .LBB467_4: # %if.then +; X86-NOBMI2-NEXT: movl 32(%esi), %eax +; X86-NOBMI2-NEXT: movl 36(%esi), %edx +; X86-NOBMI2-NEXT: .LBB467_5: # %return +; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-NOBMI2-NEXT: popl %ebx +; X86-NOBMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-NOBMI2-NEXT: retl +; +; X86-BMI2-LABEL: atomic_shl1_neq_and_64_const_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 12 +; X86-BMI2-NEXT: .cfi_offset %esi, -12 +; X86-BMI2-NEXT: .cfi_offset %ebx, -8 +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: movl (%esi), %eax +; X86-BMI2-NEXT: movl 4(%esi), %edx +; X86-BMI2-NEXT: .p2align 4, 0x90 +; X86-BMI2-NEXT: .LBB467_1: # %atomicrmw.start +; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI2-NEXT: movl %eax, %ebx +; X86-BMI2-NEXT: andl $-17, %ebx +; X86-BMI2-NEXT: movl %edx, %ecx +; X86-BMI2-NEXT: lock cmpxchg8b (%esi) +; X86-BMI2-NEXT: jne .LBB467_1 +; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: jne .LBB467_4 +; X86-BMI2-NEXT: # %bb.3: +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jmp .LBB467_5 +; X86-BMI2-NEXT: .LBB467_4: # %if.then +; X86-BMI2-NEXT: movl 32(%esi), %eax +; X86-BMI2-NEXT: movl 36(%esi), %edx +; X86-BMI2-NEXT: .LBB467_5: # %return +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: .cfi_def_cfa_offset 8 +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: .cfi_def_cfa_offset 4 +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_neq_and_64_const_brnz: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq (%rdi), %rax +; X64-NOBMI2-NEXT: .p2align 4, 0x90 +; X64-NOBMI2-NEXT: .LBB467_1: # %atomicrmw.start +; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI2-NEXT: movq %rax, %rcx +; X64-NOBMI2-NEXT: andq $-17, %rcx +; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-NOBMI2-NEXT: jne .LBB467_1 +; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-NOBMI2-NEXT: movl $123, %ecx +; X64-NOBMI2-NEXT: testb $32, %al +; X64-NOBMI2-NEXT: je .LBB467_4 +; X64-NOBMI2-NEXT: # %bb.3: # %if.then +; X64-NOBMI2-NEXT: movq 32(%rdi), %rcx +; X64-NOBMI2-NEXT: .LBB467_4: # %return +; X64-NOBMI2-NEXT: movq %rcx, %rax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_neq_and_64_const_brnz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movq (%rdi), %rax +; X64-BMI2-NEXT: .p2align 4, 0x90 +; X64-BMI2-NEXT: .LBB467_1: # %atomicrmw.start +; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI2-NEXT: movq %rax, %rcx +; X64-BMI2-NEXT: andq $-17, %rcx +; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) +; X64-BMI2-NEXT: jne .LBB467_1 +; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end +; X64-BMI2-NEXT: movl $123, %ecx +; X64-BMI2-NEXT: testb $32, %al +; X64-BMI2-NEXT: je .LBB467_4 +; X64-BMI2-NEXT: # %bb.3: # %if.then +; X64-BMI2-NEXT: movq 32(%rdi), %rcx +; X64-BMI2-NEXT: .LBB467_4: # %return +; X64-BMI2-NEXT: movq %rcx, %rax +; X64-BMI2-NEXT: retq +entry: + %0 = atomicrmw and ptr %v, i64 -17 monotonic, align 8 + %and = and i64 %0, 32 + %tobool.not = icmp eq i64 %and, 0 + br i1 %tobool.not, label %return, label %if.then + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds i64, ptr %v, i64 4 + %1 = load i64, ptr %arrayidx, align 8 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i64 [ %1, %if.then ], [ 123, %entry ] + ret i64 %retval.0 +}