diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -70,6 +70,14 @@ [ImmArg>]>; def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty], [ImmArg>]>; + def int_x86_atomic_bts_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + def int_x86_atomic_btc_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + def int_x86_atomic_btr_rm : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty], + []>; + + } // Lock binary arith with CC. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -799,6 +799,9 @@ LBTS, LBTC, LBTR, + LBTS_RM, + LBTC_RM, + LBTR_RM, /// RAO arithmetic instructions. /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5642,6 +5642,9 @@ return true; case Intrinsic::x86_cmpccxadd32: case Intrinsic::x86_cmpccxadd64: + case Intrinsic::x86_atomic_bts_rm: + case Intrinsic::x86_atomic_btc_rm: + case Intrinsic::x86_atomic_btr_rm: case Intrinsic::x86_atomic_bts: case Intrinsic::x86_atomic_btc: case Intrinsic::x86_atomic_btr: { @@ -5654,6 +5657,7 @@ MachineMemOperand::MOVolatile; return true; } + case Intrinsic::x86_aadd32: case Intrinsic::x86_aadd64: case Intrinsic::x86_aand32: @@ -28358,6 +28362,25 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, Operation.getValue(1)); } + case Intrinsic::x86_atomic_bts_rm: + case Intrinsic::x86_atomic_btc_rm: + case Intrinsic::x86_atomic_btr_rm: { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Op1 = Op.getOperand(2); + SDValue Op2 = Op.getOperand(3); + unsigned Opc = IntNo == Intrinsic::x86_atomic_bts_rm ? X86ISD::LBTS_RM + : IntNo == Intrinsic::x86_atomic_btc_rm ? X86ISD::LBTC_RM + : X86ISD::LBTR_RM; + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + SDValue Res = + DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other), + {Chain, Op1, Op2}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); + } case Intrinsic::x86_atomic_bts: case Intrinsic::x86_atomic_btc: case Intrinsic::x86_atomic_btr: { @@ -28369,6 +28392,7 @@ unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS : IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC : X86ISD::LBTR; + SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Res = @@ -31395,6 +31419,96 @@ : AtomicExpansionKind::None; } +enum BitTestKind : unsigned { + UndefBit, + ConstantBit, + NotConstantBit, + ShiftBit, + NotShiftBit +}; + +static std::pair FindSingleBitChange(Value *V) { + BitTestKind BTK = UndefBit; + auto *C = dyn_cast(V); + if (C) { + // Check if V is a power of 2 or or NOT power of 2. + if (isPowerOf2_64(C->getZExtValue())) { + BTK = ConstantBit; + } else if (isPowerOf2_64((~C->getValue()).getZExtValue())) { + BTK = NotConstantBit; + } + return {V, BTK}; + } + + // Check if V is some power of 2 pattern known to be non-zero + auto *I = dyn_cast(V); + if (I) { + bool Not = false; + // Check if we have a NOT + if (I->getOpcode() == Instruction::Sub || + I->getOpcode() == Instruction::Xor) { + + auto *OpC0 = dyn_cast(I->getOperand(0)); + auto *OpC1 = dyn_cast(I->getOperand(1)); + // Check if this is a NOT instruction: -1 - X or X/-1 ^ -1/X + if (!OpC0 && (!OpC1 || I->getOpcode() == Instruction::Sub)) + return {nullptr, UndefBit}; + + auto *MaybeNeg1 = OpC0 ? OpC0 : OpC1; + if (!MaybeNeg1->isMinusOne()) + return {nullptr, UndefBit}; + + auto *OpI0 = dyn_cast(I->getOperand(0)); + auto *OpI1 = dyn_cast(I->getOperand(1)); + + assert(OpI0 != nullptr || OpI1 != nullptr); + assert(OpI0 == nullptr || OpI1 == nullptr); + + I = OpI0 ? OpI0 : OpI1; + Not = true; + } + // We can only use 1 << X without more sophisticated analysis. C << X where + // C is a power of 2 but not 1 can result in zero which cannot be translated + // to bittest. Likewise any C >> X (either arith or logical) can be zero. + if (I->getOpcode() == Instruction::Shl) { + // Todo(1): The cmpxchg case is pretty costly so matching `BLSI(X)`, `X & + // -X` and some other provable power of 2 patterns that we can use CTZ on + // may be profitable. + // Todo(2): It may be possible in some cases to prove that Shl(C, X) is + // non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also + // be provably a non-zero power of 2. + // Todo(3): ROTL and ROTR patterns on a power of 2 C should also be + // transformable to bittest. + auto *ShiftVal = dyn_cast(I->getOperand(0)); + if (!ShiftVal) + return {nullptr, UndefBit}; + if (ShiftVal->equalsInt(1)) + BTK = Not ? NotShiftBit : ShiftBit; + + if (BTK == UndefBit) + return {nullptr, UndefBit}; + + Value *BitV = I->getOperand(1); + if (auto *I1 = dyn_cast(BitV)) { + // Read past a shiftmask instruction to find count + if (I1->getOpcode() == Instruction::And) { + auto *OpC0 = dyn_cast(I1->getOperand(0)); + auto *OpC1 = dyn_cast(I1->getOperand(1)); + if (OpC0 || OpC1) { + assert(OpC0 == nullptr || OpC1 == nullptr); + auto *C1 = OpC0 ? OpC0 : OpC1; + if (C1->equalsInt(I->getType()->getPrimitiveSizeInBits() - 1)) + BitV = OpC0 ? I1->getOperand(1) : I1->getOperand(0); + } + } + } + + return {BitV, BTK}; + } + } + return {nullptr, UndefBit}; +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const { // If the atomicrmw's result isn't actually used, we can just add a "lock" @@ -31404,51 +31518,138 @@ // If the atomicrmw's result is used by a single bit AND, we may use // bts/btr/btc instruction for these operations. - auto *C1 = dyn_cast(AI->getValOperand()); + // Note: InstCombinePass can cause a de-optimization here. It replaces the + // SETCC(And(AtomicRMW(P, power_of_2), power_of_2)) with LShr and Xor + // (depending on CC). This pattern can only use bts/btr/btc but we don't + // detect it. Instruction *I = AI->user_back(); - if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + auto BitChange = FindSingleBitChange(AI->getValOperand()); + if (!BitChange.first || BitChange.second == UndefBit || !AI->hasOneUse() || + I->getOpcode() != Instruction::And || + AI->getType()->getPrimitiveSizeInBits() == 8 || AI->getParent() != I->getParent()) return AtomicExpansionKind::CmpXChg; + + assert(I->getOperand(0) == AI); // The following instruction must be a AND single bit. - auto *C2 = dyn_cast(I->getOperand(1)); - unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); - if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + if (BitChange.second == ConstantBit || BitChange.second == NotConstantBit) { + auto *C1 = dyn_cast(AI->getValOperand()); + assert(C1 != nullptr); + auto *C2 = dyn_cast(I->getOperand(1)); + if (!C2 || !isPowerOf2_64(C2->getZExtValue())) { + return AtomicExpansionKind::CmpXChg; + } + if (AI->getOperation() == AtomicRMWInst::And) { + return ~C1->getValue() == C2->getValue() + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + } + return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; + } + + assert(BitChange.second == ShiftBit || BitChange.second == NotShiftBit); + + auto BitTested = FindSingleBitChange(I->getOperand(1)); + if (BitTested.second != ShiftBit && BitTested.second != NotShiftBit) + return AtomicExpansionKind::CmpXChg; + + assert(BitTested.first != nullptr); + + // If shift amounts are not the same we can't use BitTestIntrinsic + if (BitChange.first != BitTested.first) return AtomicExpansionKind::CmpXChg; + // If atomic AND need to be masking all be one bit and testing the one bit + // unset in the mask if (AI->getOperation() == AtomicRMWInst::And) - return ~C1->getValue() == C2->getValue() + return (BitChange.second == NotShiftBit && BitTested.second == ShiftBit) ? AtomicExpansionKind::BitTestIntrinsic : AtomicExpansionKind::CmpXChg; - return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic - : AtomicExpansionKind::CmpXChg; + // If atomic XOR/OR need to be setting and testing the same bit. + return (BitChange.second == ShiftBit && BitTested.second == ShiftBit) + ? AtomicExpansionKind::BitTestIntrinsic + : AtomicExpansionKind::CmpXChg; } void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { IRBuilder<> Builder(AI); - Intrinsic::ID IID = Intrinsic::not_intrinsic; + Intrinsic::ID IID_C = Intrinsic::not_intrinsic; + Intrinsic::ID IID_I = Intrinsic::not_intrinsic; switch (AI->getOperation()) { default: llvm_unreachable("Unknown atomic operation"); case AtomicRMWInst::Or: - IID = Intrinsic::x86_atomic_bts; + IID_C = Intrinsic::x86_atomic_bts; + IID_I = Intrinsic::x86_atomic_bts_rm; break; case AtomicRMWInst::Xor: - IID = Intrinsic::x86_atomic_btc; + IID_C = Intrinsic::x86_atomic_btc; + IID_I = Intrinsic::x86_atomic_btc_rm; break; case AtomicRMWInst::And: - IID = Intrinsic::x86_atomic_btr; + IID_C = Intrinsic::x86_atomic_btr; + IID_I = Intrinsic::x86_atomic_btr_rm; break; } Instruction *I = AI->user_back(); LLVMContext &Ctx = AI->getContext(); - unsigned Imm = - countTrailingZeros(cast(I->getOperand(1))->getZExtValue()); - Function *BitTest = - Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), Type::getInt8PtrTy(Ctx)); - Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + Function *BitTest = nullptr; + Value *Result = nullptr; + auto BitTested = FindSingleBitChange(AI->getValOperand()); + assert(BitTested.first != nullptr); + if (BitTested.second == ConstantBit || BitTested.second == NotConstantBit) { + auto *C = dyn_cast(I->getOperand(1)); + assert(C != nullptr); + + BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType()); + + unsigned Imm = countTrailingZeros(C->getZExtValue()); + Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)}); + } else { + BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType()); + + assert(BitTested.second == ShiftBit || BitTested.second == NotShiftBit); + + Value *SI = BitTested.first; + assert(SI != nullptr); + + // BT{S|R|C} on memory operand don't modulo bit position so we need to + // mask it. + unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits(); + Value *BitPos = + Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1)); + // Todo(1): In many cases it may be provable that SI is less than + // ShiftBits in which case this mask is unnecessary + // Todo(2): In the fairly idiomatic case of P[X / sizeof_bits(X)] OP 1 + // << (X % sizeof_bits(X)) we can drop the shift mask and AGEN in + // favor of just a raw BT{S|R|C}. + + Result = Builder.CreateCall(BitTest, {Addr, BitPos}); + Result = Builder.CreateZExtOrTrunc(Result, AI->getType()); + + // If the result is only used for zero/non-zero status then we don't need to + // shift value back. Otherwise do so. + for (auto It = I->user_begin(); It != I->user_end(); ++It) { + if (auto *ICmp = dyn_cast(*It)) { + if (ICmp->isEquality()) { + auto *C0 = dyn_cast(ICmp->getOperand(0)); + auto *C1 = dyn_cast(ICmp->getOperand(1)); + if (C0 || C1) { + assert(C0 == nullptr || C1 == nullptr); + if ((C0 ? C0 : C1)->isZero()) + continue; + } + } + } + Result = Builder.CreateShl(Result, BitPos); + break; + } + } + I->replaceAllUsesWith(Result); I->eraseFromParent(); AI->eraseFromParent(); @@ -34221,6 +34422,9 @@ NODE_NAME_CASE(LBTS) NODE_NAME_CASE(LBTC) NODE_NAME_CASE(LBTR) + NODE_NAME_CASE(LBTS_RM) + NODE_NAME_CASE(LBTC_RM) + NODE_NAME_CASE(LBTR_RM) NODE_NAME_CASE(AADD) NODE_NAME_CASE(AOR) NODE_NAME_CASE(AXOR) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -869,6 +869,17 @@ def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; + +def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; + + multiclass ATOMIC_LOGIC_OP { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, SchedRW = [WriteBitTestSetRegRMW] in { @@ -887,10 +898,33 @@ } } +multiclass ATOMIC_LOGIC_OP_RM Opc8, string s> { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16rm : Ii8("x86_rm_" # s) addr:$src1, GR16:$src2))]>, + OpSize16, TB, LOCK; + def 32rm : Ii8("x86_rm_" # s) addr:$src1, GR32:$src2))]>, + OpSize32, TB, LOCK; + def 64rm : RIi8("x86_rm_" # s) addr:$src1, GR64:$src2))]>, + TB, LOCK; + } +} + + defm LOCK_BTS : ATOMIC_LOGIC_OP; defm LOCK_BTC : ATOMIC_LOGIC_OP; defm LOCK_BTR : ATOMIC_LOGIC_OP; +defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">; +defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">; +defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll --- a/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll +++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test.ll @@ -1314,95 +1314,48 @@ define zeroext i16 @atomic_shl1_small_mask_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_xor_16_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: andb $7, %cl -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movzwl (%edx), %eax -; X86-NOBMI2-NEXT: movzwl %si, %ecx -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB13_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %esi -; X86-NOBMI2-NEXT: xorl %ecx, %esi -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: lock cmpxchgw %si, (%edx) -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-NOBMI2-NEXT: jne .LBB13_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %ecx, %eax +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $7, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_xor_16_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: andb $7, %al -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %eax, %edx, %edx -; X86-BMI2-NEXT: movzwl (%ecx), %eax -; X86-BMI2-NEXT: movzwl %dx, %edx -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB13_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %esi -; X86-BMI2-NEXT: xorl %edx, %esi -; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) -; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-BMI2-NEXT: jne .LBB13_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $7, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_16_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andb $7, %cl -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $7, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movzwl (%rdi), %eax -; X64-NOBMI2-NEXT: movzwl %dx, %ecx -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB13_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %edx -; X64-NOBMI2-NEXT: xorl %ecx, %edx -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NOBMI2-NEXT: lock cmpxchgw %dx, (%rdi) -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-NOBMI2-NEXT: jne .LBB13_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %ecx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_16_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: andb $7, %sil -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movzwl (%rdi), %eax -; X64-BMI2-NEXT: movzwl %cx, %ecx -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB13_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: xorl %ecx, %edx -; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) -; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-BMI2-NEXT: jne .LBB13_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $7, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-BMI2-NEXT: retq entry: @@ -1647,91 +1600,48 @@ define zeroext i16 @atomic_shl1_mask01_xor_16_gpr_val(ptr %v, i16 zeroext %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: andb $15, %cl -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movzwl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB16_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: xorl %edx, %ecx -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-NOBMI2-NEXT: jne .LBB16_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btcw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: andb $15, %al -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %eax, %edx, %edx -; X86-BMI2-NEXT: movzwl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB16_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %esi -; X86-BMI2-NEXT: xorl %edx, %esi -; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: lock cmpxchgw %si, (%ecx) -; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-BMI2-NEXT: jne .LBB16_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %edx, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btcw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andb $15, %cl -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movzwl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB16_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: xorl %edx, %ecx -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-NOBMI2-NEXT: jne .LBB16_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_xor_16_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: andb $15, %sil -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movzwl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB16_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: xorl %ecx, %edx -; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-BMI2-NEXT: lock cmpxchgw %dx, (%rdi) -; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-BMI2-NEXT: jne .LBB16_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-BMI2-NEXT: retq entry: @@ -4194,111 +4104,49 @@ define zeroext i16 @atomic_shl1_small_mask_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_and_16_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: andb $7, %cl -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movw $-2, %di -; X86-NOBMI2-NEXT: rolw %cl, %di -; X86-NOBMI2-NEXT: movzwl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB37_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: andl %edi, %ecx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $7, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%edx) -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-NOBMI2-NEXT: jne .LBB37_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movzwl %si, %ecx -; X86-NOBMI2-NEXT: andl %eax, %ecx -; X86-NOBMI2-NEXT: movl %ecx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_and_16_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: andb $7, %cl -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movw $-2, %di -; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-BMI2-NEXT: rolw %cl, %di -; X86-BMI2-NEXT: movzwl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB37_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %ecx -; X86-BMI2-NEXT: andl %edi, %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $7, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) -; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-BMI2-NEXT: jne .LBB37_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movzwl %si, %ecx -; X86-BMI2-NEXT: andl %eax, %ecx -; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_16_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andb $7, %cl -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movw $-2, %si +; X64-NOBMI2-NEXT: andl $7, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: rolw %cl, %si -; X64-NOBMI2-NEXT: movzwl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB37_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: andl %esi, %ecx +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-NOBMI2-NEXT: jne .LBB37_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movzwl %dx, %ecx -; X64-NOBMI2-NEXT: andl %eax, %ecx -; X64-NOBMI2-NEXT: movl %ecx, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_and_16_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl %esi, %ecx -; X64-BMI2-NEXT: andb $7, %cl -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %ecx, %eax, %edx -; X64-BMI2-NEXT: movw $-2, %si -; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI2-NEXT: rolw %cl, %si -; X64-BMI2-NEXT: movzwl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB37_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %ecx -; X64-BMI2-NEXT: andl %esi, %ecx +; X64-BMI2-NEXT: andl $7, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-BMI2-NEXT: jne .LBB37_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movzwl %dx, %ecx -; X64-BMI2-NEXT: andl %eax, %ecx -; X64-BMI2-NEXT: movl %ecx, %eax ; X64-BMI2-NEXT: retq entry: %0 = and i16 %c, 7 @@ -4540,109 +4388,48 @@ define zeroext i16 @atomic_shl1_mask01_and_16_gpr_val(ptr %v, i16 zeroext %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: andb $15, %cl -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movw $-2, %di -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: rolw %cl, %di -; X86-NOBMI2-NEXT: movzwl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB40_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: andl %edi, %ecx -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: lock cmpxchgw %cx, (%esi) -; X86-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-NOBMI2-NEXT: jne .LBB40_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %edx, %eax +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btrw %cx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: andb $15, %al -; X86-BMI2-NEXT: movl $1, %esi -; X86-BMI2-NEXT: shlxl %eax, %esi, %esi -; X86-BMI2-NEXT: movw $-2, %di -; X86-BMI2-NEXT: rolw %cl, %di -; X86-BMI2-NEXT: movzwl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB40_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %ecx -; X86-BMI2-NEXT: andl %edi, %ecx -; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: lock cmpxchgw %cx, (%edx) -; X86-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X86-BMI2-NEXT: jne .LBB40_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %esi, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btrw %cx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andb $15, %cl -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movw $-2, %r8w -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: rolw %cl, %r8w -; X64-NOBMI2-NEXT: movzwl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB40_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: andl %r8d, %ecx -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NOBMI2-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-NOBMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-NOBMI2-NEXT: jne .LBB40_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrw %cx, (%rdi) +; X64-NOBMI2-NEXT: setb %al +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_and_16_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl %esi, %ecx -; X64-BMI2-NEXT: movl %ecx, %eax -; X64-BMI2-NEXT: andb $15, %al -; X64-BMI2-NEXT: movl $1, %edx -; X64-BMI2-NEXT: shlxl %eax, %edx, %edx -; X64-BMI2-NEXT: movw $-2, %si -; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI2-NEXT: rolw %cl, %si -; X64-BMI2-NEXT: movzwl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB40_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %ecx -; X64-BMI2-NEXT: andl %esi, %ecx -; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-BMI2-NEXT: lock cmpxchgw %cx, (%rdi) -; X64-BMI2-NEXT: # kill: def $ax killed $ax def $eax -; X64-BMI2-NEXT: jne .LBB40_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %edx, %eax +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrw %si, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax ; X64-BMI2-NEXT: retq entry: @@ -6791,76 +6578,45 @@ define i32 @atomic_shl1_or_32_gpr_val(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB60_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: orl %edx, %ecx -; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) -; X86-NOBMI2-NEXT: jne .LBB60_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %eax, %edx, %edx -; X86-BMI2-NEXT: movl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB60_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %esi -; X86-BMI2-NEXT: orl %edx, %esi -; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) -; X86-BMI2-NEXT: jne .LBB60_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %edx, %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB60_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: orl %edx, %ecx -; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB60_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB60_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB60_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -6872,80 +6628,45 @@ define i32 @atomic_shl1_small_mask_or_32_gpr_val(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: andb $15, %cl -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB61_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: orl %esi, %ecx -; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%edx) -; X86-NOBMI2-NEXT: jne .LBB61_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %esi, %eax -; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $15, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: andb $15, %al -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %eax, %edx, %edx -; X86-BMI2-NEXT: movl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB61_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %esi -; X86-BMI2-NEXT: orl %edx, %esi -; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) -; X86-BMI2-NEXT: jne .LBB61_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %edx, %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andb $15, %cl -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $15, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB61_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: orl %edx, %ecx -; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB61_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: andb $15, %sil -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB61_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB61_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $15, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq entry: %0 = and i32 %c, 15 @@ -6958,89 +6679,45 @@ define i32 @atomic_shl1_mask0_or_32_gpr_val(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB62_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB62_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: andl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB62_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB62_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx -; X86-BMI2-NEXT: andl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB62_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB62_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB62_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB62_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %ecx -; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq entry: %0 = and i32 %c, 31 @@ -7054,89 +6731,45 @@ define i32 @atomic_shl1_mask1_or_32_gpr_val(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB63_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB63_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $1, %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: andl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB63_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB63_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %ecx, %edx, %ecx -; X86-BMI2-NEXT: andl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB63_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB63_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB63_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB63_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %ecx -; X64-BMI2-NEXT: shlxl %esi, %ecx, %ecx -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -7150,76 +6783,45 @@ define i32 @atomic_shl1_mask01_or_32_gpr_val(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB64_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %ecx -; X86-NOBMI2-NEXT: orl %edx, %ecx -; X86-NOBMI2-NEXT: lock cmpxchgl %ecx, (%esi) -; X86-NOBMI2-NEXT: jne .LBB64_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: andl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: andl $31, %ecx +; X86-NOBMI2-NEXT: xorl %eax, %eax +; X86-NOBMI2-NEXT: lock btsl %ecx, (%edx) +; X86-NOBMI2-NEXT: setb %al +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movl $1, %edx -; X86-BMI2-NEXT: shlxl %eax, %edx, %edx -; X86-BMI2-NEXT: movl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB64_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %esi -; X86-BMI2-NEXT: orl %edx, %esi -; X86-BMI2-NEXT: lock cmpxchgl %esi, (%ecx) -; X86-BMI2-NEXT: jne .LBB64_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: andl %edx, %eax -; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: andl $31, %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: setb %dl +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btsl %ecx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB64_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %ecx -; X64-NOBMI2-NEXT: orl %edx, %ecx -; X64-NOBMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB64_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB64_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB64_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq entry: %0 = and i32 %c, 31 @@ -8476,106 +8078,59 @@ define i32 @atomic_shl1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_br: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB78_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB78_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB78_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB78_5 -; X86-NOBMI2-NEXT: .LBB78_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB78_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB78_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB78_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_br: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB78_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB78_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB78_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB78_5 -; X86-BMI2-NEXT: .LBB78_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB78_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB78_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB78_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_br: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB78_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB78_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB78_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB78_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB78_3: +; X64-NOBMI2-NEXT: .LBB78_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_br: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB78_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB78_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB78_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB78_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB78_3: +; X64-BMI2-NEXT: .LBB78_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -8599,110 +8154,55 @@ define i32 @atomic_shl1_small_mask_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: andl $15, %ecx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB79_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB79_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB79_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB79_5 -; X86-NOBMI2-NEXT: .LBB79_3: +; X86-NOBMI2-NEXT: lock btsl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB79_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB79_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB79_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl $15, %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %esi -; X86-BMI2-NEXT: movl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB79_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) -; X86-BMI2-NEXT: jne .LBB79_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB79_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax -; X86-BMI2-NEXT: jmp .LBB79_5 -; X86-BMI2-NEXT: .LBB79_3: +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB79_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB79_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB79_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andl $15, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB79_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB79_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB79_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB79_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB79_3: +; X64-NOBMI2-NEXT: .LBB79_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_br: ; X64-BMI2: # %bb.0: # %entry ; X64-BMI2-NEXT: andl $15, %esi -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB79_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB79_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB79_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB79_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB79_3: +; X64-BMI2-NEXT: .LBB79_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -8727,106 +8227,59 @@ define i32 @atomic_shl1_mask0_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB80_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB80_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jae .LBB80_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB80_5 -; X86-NOBMI2-NEXT: .LBB80_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB80_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB80_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB80_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB80_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB80_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: btl %ecx, %eax -; X86-BMI2-NEXT: jae .LBB80_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB80_5 -; X86-BMI2-NEXT: .LBB80_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB80_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB80_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB80_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB80_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB80_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB80_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB80_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB80_3: +; X64-NOBMI2-NEXT: .LBB80_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_br: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB80_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB80_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB80_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB80_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB80_3: +; X64-BMI2-NEXT: .LBB80_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -8852,106 +8305,59 @@ define i32 @atomic_shl1_mask1_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB81_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB81_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jae .LBB81_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB81_5 -; X86-NOBMI2-NEXT: .LBB81_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB81_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB81_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB81_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB81_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB81_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: btl %ecx, %eax -; X86-BMI2-NEXT: jae .LBB81_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB81_5 -; X86-BMI2-NEXT: .LBB81_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB81_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB81_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB81_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB81_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB81_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB81_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB81_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB81_3: +; X64-NOBMI2-NEXT: .LBB81_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_br: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB81_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB81_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB81_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB81_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB81_3: +; X64-BMI2-NEXT: .LBB81_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -8977,106 +8383,59 @@ define i32 @atomic_shl1_mask01_or_32_gpr_br(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB82_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB82_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB82_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB82_5 -; X86-NOBMI2-NEXT: .LBB82_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB82_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB82_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB82_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB82_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB82_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB82_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB82_5 -; X86-BMI2-NEXT: .LBB82_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB82_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB82_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB82_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB82_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB82_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB82_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB82_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB82_3: +; X64-NOBMI2-NEXT: .LBB82_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_br: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB82_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB82_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB82_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB82_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB82_3: +; X64-BMI2-NEXT: .LBB82_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -9224,109 +8583,60 @@ define i32 @atomic_shl1_or_32_gpr_brz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movl $1, %edi -; X86-NOBMI2-NEXT: shll %cl, %edi -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB84_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edx -; X86-NOBMI2-NEXT: orl %edi, %edx -; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%esi) -; X86-NOBMI2-NEXT: jne .LBB84_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $123, %edx -; X86-NOBMI2-NEXT: testl %edi, %eax -; X86-NOBMI2-NEXT: jne .LBB84_4 -; X86-NOBMI2-NEXT: # %bb.3: # %if.then -; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx -; X86-NOBMI2-NEXT: .LBB84_4: # %return -; X86-NOBMI2-NEXT: movl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB84_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB84_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_brz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %edi -; X86-BMI2-NEXT: movl (%esi), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB84_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %ecx -; X86-BMI2-NEXT: orl %edi, %ecx -; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) -; X86-BMI2-NEXT: jne .LBB84_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $123, %ecx -; X86-BMI2-NEXT: testl %edi, %eax -; X86-BMI2-NEXT: jne .LBB84_4 -; X86-BMI2-NEXT: # %bb.3: # %if.then -; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx -; X86-BMI2-NEXT: .LBB84_4: # %return ; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB84_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB84_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %esi -; X64-NOBMI2-NEXT: shll %cl, %esi -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB84_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %edx -; X64-NOBMI2-NEXT: orl %esi, %edx -; X64-NOBMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB84_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $123, %edx -; X64-NOBMI2-NEXT: testl %esi, %eax -; X64-NOBMI2-NEXT: je .LBB84_3 -; X64-NOBMI2-NEXT: # %bb.4: # %return -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB84_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB84_3: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax -; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: .LBB84_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_brz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %edx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB84_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %ecx -; X64-BMI2-NEXT: orl %edx, %ecx -; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-BMI2-NEXT: jne .LBB84_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $123, %ecx -; X64-BMI2-NEXT: testl %edx, %eax -; X64-BMI2-NEXT: je .LBB84_3 -; X64-BMI2-NEXT: # %bb.4: # %return -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB84_1 +; X64-BMI2-NEXT: # %bb.2: # %return ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB84_3: # %if.then +; X64-BMI2-NEXT: .LBB84_1: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -9349,113 +8659,56 @@ define i32 @atomic_shl1_small_mask_or_32_gpr_brz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: andl $15, %ecx -; X86-NOBMI2-NEXT: movl $1, %edi -; X86-NOBMI2-NEXT: shll %cl, %edi -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB85_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edx -; X86-NOBMI2-NEXT: orl %edi, %edx -; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%esi) -; X86-NOBMI2-NEXT: jne .LBB85_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $123, %edx -; X86-NOBMI2-NEXT: testl %edi, %eax -; X86-NOBMI2-NEXT: jne .LBB85_4 -; X86-NOBMI2-NEXT: # %bb.3: # %if.then -; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx -; X86-NOBMI2-NEXT: .LBB85_4: # %return -; X86-NOBMI2-NEXT: movl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: andl $15, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB85_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB85_1: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%edx,4), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: andl $15, %esi -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %esi, %eax, %edi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB85_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %ecx -; X86-BMI2-NEXT: orl %edi, %ecx -; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%edx) -; X86-BMI2-NEXT: jne .LBB85_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $123, %ecx -; X86-BMI2-NEXT: testl %edi, %eax -; X86-BMI2-NEXT: jne .LBB85_4 -; X86-BMI2-NEXT: # %bb.3: # %if.then -; X86-BMI2-NEXT: movl (%edx,%esi,4), %ecx -; X86-BMI2-NEXT: .LBB85_4: # %return -; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $15, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB85_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB85_1: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andl $15, %ecx -; X64-NOBMI2-NEXT: movl $1, %esi -; X64-NOBMI2-NEXT: shll %cl, %esi -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB85_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %edx -; X64-NOBMI2-NEXT: orl %esi, %edx -; X64-NOBMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB85_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $123, %edx -; X64-NOBMI2-NEXT: testl %esi, %eax -; X64-NOBMI2-NEXT: je .LBB85_3 -; X64-NOBMI2-NEXT: # %bb.4: # %return -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB85_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB85_3: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax -; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: .LBB85_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brz: ; X64-BMI2: # %bb.0: # %entry ; X64-BMI2-NEXT: andl $15, %esi -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %edx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB85_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %ecx -; X64-BMI2-NEXT: orl %edx, %ecx -; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-BMI2-NEXT: jne .LBB85_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $123, %ecx -; X64-BMI2-NEXT: testl %edx, %eax -; X64-BMI2-NEXT: je .LBB85_3 -; X64-BMI2-NEXT: # %bb.4: # %return -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB85_1 +; X64-BMI2-NEXT: # %bb.2: # %return ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB85_3: # %if.then +; X64-BMI2-NEXT: .LBB85_1: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq entry: %0 = and i32 %c, 15 @@ -9479,109 +8732,60 @@ define i32 @atomic_shl1_mask0_or_32_gpr_brz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB86_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %edx, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) -; X86-NOBMI2-NEXT: jne .LBB86_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $123, %edx -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jb .LBB86_4 -; X86-NOBMI2-NEXT: # %bb.3: # %if.then -; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx -; X86-NOBMI2-NEXT: .LBB86_4: # %return -; X86-NOBMI2-NEXT: movl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB86_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB86_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %ecx -; X86-BMI2-NEXT: movl (%esi), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB86_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %ecx, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%esi) -; X86-BMI2-NEXT: jne .LBB86_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $123, %ecx -; X86-BMI2-NEXT: btl %edx, %eax -; X86-BMI2-NEXT: jb .LBB86_4 -; X86-BMI2-NEXT: # %bb.3: # %if.then -; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx -; X86-BMI2-NEXT: .LBB86_4: # %return ; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB86_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB86_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB86_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB86_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $123, %edx -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB86_3 -; X64-NOBMI2-NEXT: # %bb.4: # %return -; X64-NOBMI2-NEXT: movl %edx, %eax -; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB86_3: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax -; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB86_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return ; X64-NOBMI2-NEXT: retq -; -; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: -; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB86_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB86_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $123, %ecx -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB86_3 -; X64-BMI2-NEXT: # %bb.4: # %return -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: .LBB86_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax +; X64-NOBMI2-NEXT: retq +; +; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brz: +; X64-BMI2: # %bb.0: # %entry +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB86_1 +; X64-BMI2-NEXT: # %bb.2: # %return ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB86_3: # %if.then +; X64-BMI2-NEXT: .LBB86_1: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq entry: %rem = and i32 %c, 31 @@ -9606,109 +8810,60 @@ define i32 @atomic_shl1_mask1_or_32_gpr_brz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movl $1, %edx -; X86-NOBMI2-NEXT: shll %cl, %edx -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB87_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %edx, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%esi) -; X86-NOBMI2-NEXT: jne .LBB87_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $123, %edx -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jb .LBB87_4 -; X86-NOBMI2-NEXT: # %bb.3: # %if.then -; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx -; X86-NOBMI2-NEXT: .LBB87_4: # %return -; X86-NOBMI2-NEXT: movl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB87_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB87_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %ecx -; X86-BMI2-NEXT: movl (%esi), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB87_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %ecx, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%esi) -; X86-BMI2-NEXT: jne .LBB87_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $123, %ecx -; X86-BMI2-NEXT: btl %edx, %eax -; X86-BMI2-NEXT: jb .LBB87_4 -; X86-BMI2-NEXT: # %bb.3: # %if.then -; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx -; X86-BMI2-NEXT: .LBB87_4: # %return ; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB87_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB87_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB87_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB87_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $123, %edx -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB87_3 -; X64-NOBMI2-NEXT: # %bb.4: # %return -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB87_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB87_3: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax -; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: .LBB87_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB87_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB87_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $123, %ecx -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB87_3 -; X64-BMI2-NEXT: # %bb.4: # %return -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB87_1 +; X64-BMI2-NEXT: # %bb.2: # %return ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB87_3: # %if.then +; X64-BMI2-NEXT: .LBB87_1: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i32 1, %c @@ -9733,109 +8888,60 @@ define i32 @atomic_shl1_mask01_or_32_gpr_brz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movl $1, %edi -; X86-NOBMI2-NEXT: shll %cl, %edi -; X86-NOBMI2-NEXT: movl (%esi), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB88_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edx -; X86-NOBMI2-NEXT: orl %edi, %edx -; X86-NOBMI2-NEXT: lock cmpxchgl %edx, (%esi) -; X86-NOBMI2-NEXT: jne .LBB88_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: movl $123, %edx -; X86-NOBMI2-NEXT: testl %edi, %eax -; X86-NOBMI2-NEXT: jne .LBB88_4 -; X86-NOBMI2-NEXT: # %bb.3: # %if.then -; X86-NOBMI2-NEXT: movl (%esi,%ecx,4), %edx -; X86-NOBMI2-NEXT: .LBB88_4: # %return -; X86-NOBMI2-NEXT: movl %edx, %eax -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl %ecx, %eax +; X86-NOBMI2-NEXT: andl $31, %eax +; X86-NOBMI2-NEXT: lock btsl %eax, (%edx) +; X86-NOBMI2-NEXT: movl $123, %eax +; X86-NOBMI2-NEXT: jae .LBB88_1 +; X86-NOBMI2-NEXT: # %bb.2: # %return +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB88_1: # %if.then +; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %edi -; X86-BMI2-NEXT: movl (%esi), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB88_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %ecx -; X86-BMI2-NEXT: orl %edi, %ecx -; X86-BMI2-NEXT: lock cmpxchgl %ecx, (%esi) -; X86-BMI2-NEXT: jne .LBB88_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: movl $123, %ecx -; X86-BMI2-NEXT: testl %edi, %eax -; X86-BMI2-NEXT: jne .LBB88_4 -; X86-BMI2-NEXT: # %bb.3: # %if.then -; X86-BMI2-NEXT: movl (%esi,%edx,4), %ecx -; X86-BMI2-NEXT: .LBB88_4: # %return ; X86-BMI2-NEXT: movl %ecx, %eax -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: andl $31, %eax +; X86-BMI2-NEXT: lock btsl %eax, (%edx) +; X86-BMI2-NEXT: movl $123, %eax +; X86-BMI2-NEXT: jae .LBB88_1 +; X86-BMI2-NEXT: # %bb.2: # %return +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB88_1: # %if.then +; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %esi -; X64-NOBMI2-NEXT: shll %cl, %esi -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB88_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %edx -; X64-NOBMI2-NEXT: orl %esi, %edx -; X64-NOBMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB88_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $123, %edx -; X64-NOBMI2-NEXT: testl %esi, %eax -; X64-NOBMI2-NEXT: je .LBB88_3 -; X64-NOBMI2-NEXT: # %bb.4: # %return -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: movl $123, %eax +; X64-NOBMI2-NEXT: jae .LBB88_1 +; X64-NOBMI2-NEXT: # %bb.2: # %return ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB88_3: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax -; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %edx -; X64-NOBMI2-NEXT: movl %edx, %eax +; X64-NOBMI2-NEXT: .LBB88_1: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %edx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB88_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %ecx -; X64-BMI2-NEXT: orl %edx, %ecx -; X64-BMI2-NEXT: lock cmpxchgl %ecx, (%rdi) -; X64-BMI2-NEXT: jne .LBB88_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $123, %ecx -; X64-BMI2-NEXT: testl %edx, %eax -; X64-BMI2-NEXT: je .LBB88_3 -; X64-BMI2-NEXT: # %bb.4: # %return -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: movl $123, %eax +; X64-BMI2-NEXT: jae .LBB88_1 +; X64-BMI2-NEXT: # %bb.2: # %return ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB88_3: # %if.then +; X64-BMI2-NEXT: .LBB88_1: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax -; X64-BMI2-NEXT: movl (%rdi,%rax,4), %ecx -; X64-BMI2-NEXT: movl %ecx, %eax +; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq entry: %rem = and i32 %c, 31 @@ -9984,106 +9090,59 @@ define i32 @atomic_shl1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brnz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB90_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB90_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB90_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB90_5 -; X86-NOBMI2-NEXT: .LBB90_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB90_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB90_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB90_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_or_32_gpr_brnz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB90_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB90_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB90_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB90_5 -; X86-BMI2-NEXT: .LBB90_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB90_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB90_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB90_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_or_32_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB90_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB90_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB90_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB90_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB90_3: +; X64-NOBMI2-NEXT: .LBB90_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_or_32_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB90_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB90_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB90_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB90_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB90_3: +; X64-BMI2-NEXT: .LBB90_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -10107,110 +9166,55 @@ define i32 @atomic_shl1_small_mask_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: andl $15, %ecx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB91_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB91_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB91_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB91_5 -; X86-NOBMI2-NEXT: .LBB91_3: +; X86-NOBMI2-NEXT: lock btsl %ecx, (%eax) +; X86-NOBMI2-NEXT: jae .LBB91_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB91_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB91_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl $15, %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %edx, %eax, %esi -; X86-BMI2-NEXT: movl (%ecx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB91_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%ecx) -; X86-BMI2-NEXT: jne .LBB91_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB91_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%ecx,%edx,4), %eax -; X86-BMI2-NEXT: jmp .LBB91_5 -; X86-BMI2-NEXT: .LBB91_3: +; X86-BMI2-NEXT: andl $15, %ecx +; X86-BMI2-NEXT: lock btsl %ecx, (%eax) +; X86-BMI2-NEXT: jae .LBB91_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%eax,%ecx,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB91_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB91_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: andl $15, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB91_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB91_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB91_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: andl $15, %esi +; X64-NOBMI2-NEXT: lock btsl %esi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB91_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB91_3: +; X64-NOBMI2-NEXT: .LBB91_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_or_32_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry ; X64-BMI2-NEXT: andl $15, %esi -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB91_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB91_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB91_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: lock btsl %esi, (%rdi) +; X64-BMI2-NEXT: jae .LBB91_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB91_3: +; X64-BMI2-NEXT: .LBB91_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -10235,106 +9239,59 @@ define i32 @atomic_shl1_mask0_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB92_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB92_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jae .LBB92_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB92_5 -; X86-NOBMI2-NEXT: .LBB92_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB92_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB92_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB92_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB92_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB92_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: btl %ecx, %eax -; X86-BMI2-NEXT: jae .LBB92_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB92_5 -; X86-BMI2-NEXT: .LBB92_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB92_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB92_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB92_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB92_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB92_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB92_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB92_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB92_3: +; X64-NOBMI2-NEXT: .LBB92_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_or_32_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB92_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB92_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB92_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB92_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB92_3: +; X64-BMI2-NEXT: .LBB92_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -10360,106 +9317,59 @@ define i32 @atomic_shl1_mask1_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB93_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB93_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: btl %ecx, %eax -; X86-NOBMI2-NEXT: jae .LBB93_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB93_5 -; X86-NOBMI2-NEXT: .LBB93_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB93_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB93_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB93_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: ; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB93_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB93_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: btl %ecx, %eax -; X86-BMI2-NEXT: jae .LBB93_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB93_5 -; X86-BMI2-NEXT: .LBB93_3: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB93_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB93_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB93_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB93_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB93_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btl %ecx, %eax -; X64-NOBMI2-NEXT: jae .LBB93_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB93_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB93_3: +; X64-NOBMI2-NEXT: .LBB93_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_or_32_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB93_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB93_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btl %esi, %eax -; X64-BMI2-NEXT: jae .LBB93_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB93_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB93_3: +; X64-BMI2-NEXT: .LBB93_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -10485,106 +9395,59 @@ define i32 @atomic_shl1_mask01_or_32_gpr_brnz(ptr %v, i32 %c) nounwind { ; X86-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: ; X86-NOBMI2: # %bb.0: # %entry -; X86-NOBMI2-NEXT: pushl %edi -; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $1, %esi -; X86-NOBMI2-NEXT: shll %cl, %esi -; X86-NOBMI2-NEXT: movl (%edx), %eax -; X86-NOBMI2-NEXT: .p2align 4, 0x90 -; X86-NOBMI2-NEXT: .LBB94_1: # %atomicrmw.start -; X86-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOBMI2-NEXT: movl %eax, %edi -; X86-NOBMI2-NEXT: orl %esi, %edi -; X86-NOBMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-NOBMI2-NEXT: jne .LBB94_1 -; X86-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-NOBMI2-NEXT: testl %esi, %eax -; X86-NOBMI2-NEXT: je .LBB94_3 -; X86-NOBMI2-NEXT: # %bb.4: # %if.then -; X86-NOBMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-NOBMI2-NEXT: jmp .LBB94_5 -; X86-NOBMI2-NEXT: .LBB94_3: +; X86-NOBMI2-NEXT: movl %eax, %edx +; X86-NOBMI2-NEXT: andl $31, %edx +; X86-NOBMI2-NEXT: lock btsl %edx, (%ecx) +; X86-NOBMI2-NEXT: jae .LBB94_1 +; X86-NOBMI2-NEXT: # %bb.2: # %if.then +; X86-NOBMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-NOBMI2-NEXT: retl +; X86-NOBMI2-NEXT: .LBB94_1: ; X86-NOBMI2-NEXT: movl $123, %eax -; X86-NOBMI2-NEXT: .LBB94_5: # %return -; X86-NOBMI2-NEXT: popl %esi -; X86-NOBMI2-NEXT: popl %edi ; X86-NOBMI2-NEXT: retl -; -; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: -; X86-BMI2: # %bb.0: # %entry -; X86-BMI2-NEXT: pushl %edi -; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: movl $1, %eax -; X86-BMI2-NEXT: shlxl %ecx, %eax, %esi -; X86-BMI2-NEXT: movl (%edx), %eax -; X86-BMI2-NEXT: .p2align 4, 0x90 -; X86-BMI2-NEXT: .LBB94_1: # %atomicrmw.start -; X86-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-BMI2-NEXT: movl %eax, %edi -; X86-BMI2-NEXT: orl %esi, %edi -; X86-BMI2-NEXT: lock cmpxchgl %edi, (%edx) -; X86-BMI2-NEXT: jne .LBB94_1 -; X86-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X86-BMI2-NEXT: testl %esi, %eax -; X86-BMI2-NEXT: je .LBB94_3 -; X86-BMI2-NEXT: # %bb.4: # %if.then -; X86-BMI2-NEXT: movl (%edx,%ecx,4), %eax -; X86-BMI2-NEXT: jmp .LBB94_5 -; X86-BMI2-NEXT: .LBB94_3: +; +; X86-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: +; X86-BMI2: # %bb.0: # %entry +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: andl $31, %edx +; X86-BMI2-NEXT: lock btsl %edx, (%ecx) +; X86-BMI2-NEXT: jae .LBB94_1 +; X86-BMI2-NEXT: # %bb.2: # %if.then +; X86-BMI2-NEXT: movl (%ecx,%eax,4), %eax +; X86-BMI2-NEXT: retl +; X86-BMI2-NEXT: .LBB94_1: ; X86-BMI2-NEXT: movl $123, %eax -; X86-BMI2-NEXT: .LBB94_5: # %return -; X86-BMI2-NEXT: popl %esi -; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movl %esi, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shll %cl, %edx -; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB94_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movl %eax, %esi -; X64-NOBMI2-NEXT: orl %edx, %esi -; X64-NOBMI2-NEXT: lock cmpxchgl %esi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB94_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB94_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movl %ecx, %eax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $31, %eax +; X64-NOBMI2-NEXT: lock btsl %eax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB94_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movl %esi, %eax ; X64-NOBMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB94_3: +; X64-NOBMI2-NEXT: .LBB94_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_or_32_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxl %esi, %eax, %ecx -; X64-BMI2-NEXT: movl (%rdi), %eax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB94_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movl %eax, %edx -; X64-BMI2-NEXT: orl %ecx, %edx -; X64-BMI2-NEXT: lock cmpxchgl %edx, (%rdi) -; X64-BMI2-NEXT: jne .LBB94_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %ecx, %eax -; X64-BMI2-NEXT: je .LBB94_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $31, %eax +; X64-BMI2-NEXT: lock btsl %eax, (%rdi) +; X64-BMI2-NEXT: jae .LBB94_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then ; X64-BMI2-NEXT: movl %esi, %eax ; X64-BMI2-NEXT: movl (%rdi,%rax,4), %eax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB94_3: +; X64-BMI2-NEXT: .LBB94_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -12958,35 +11821,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_xor_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB122_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: xorq %rdx, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB122_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_xor_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB122_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rdx -; X64-BMI2-NEXT: xorq %rcx, %rdx -; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-BMI2-NEXT: jne .LBB122_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -13319,37 +12168,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: andb $31, %cl -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB125_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: xorq %rdx, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB125_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_xor_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: andb $31, %sil -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB125_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rdx -; X64-BMI2-NEXT: xorq %rcx, %rdx -; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-BMI2-NEXT: jne .LBB125_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %ecx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 31 @@ -13461,39 +12294,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB126_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: xorq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB126_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_xor_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB126_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rdx -; X64-BMI2-NEXT: xorq %rcx, %rdx -; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-BMI2-NEXT: jne .LBB126_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %ecx -; X64-BMI2-NEXT: shlxq %rsi, %rcx, %rcx -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 63 @@ -13606,39 +12421,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB127_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: xorq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB127_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_xor_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB127_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rdx -; X64-BMI2-NEXT: xorq %rcx, %rdx -; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-BMI2-NEXT: jne .LBB127_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %ecx -; X64-BMI2-NEXT: shlxq %rsi, %rcx, %rcx -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -13729,35 +12526,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btcq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB128_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: xorq %rdx, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB128_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_xor_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rcx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB128_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rdx -; X64-BMI2-NEXT: xorq %rcx, %rdx -; X64-BMI2-NEXT: lock cmpxchgq %rdx, (%rdi) -; X64-BMI2-NEXT: jne .LBB128_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btcq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 63 @@ -16172,41 +14955,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB146_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: andq %rsi, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB146_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB146_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rcx -; X64-BMI2-NEXT: andq %rsi, %rcx -; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-BMI2-NEXT: jne .LBB146_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andq %rdx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -16582,43 +15345,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: andb $31, %cl -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi +; X64-NOBMI2-NEXT: andl $31, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB149_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: andq %rsi, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB149_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andl %edx, %eax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: andb $31, %cl -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rcx, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB149_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rcx -; X64-BMI2-NEXT: andq %rsi, %rcx -; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-BMI2-NEXT: jne .LBB149_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andl %edx, %eax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 31 @@ -16729,46 +15470,27 @@ ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: popl %edi ; X86-BMI2-NEXT: popl %ebx -; X86-BMI2-NEXT: popl %ebp -; X86-BMI2-NEXT: retl -; -; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: -; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movq $-2, %rdx -; X64-NOBMI2-NEXT: rolq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB150_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: andq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB150_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X86-BMI2-NEXT: popl %ebp +; X86-BMI2-NEXT: retl +; +; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: +; X64-NOBMI2: # %bb.0: # %entry +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movq $-2, %rdx -; X64-BMI2-NEXT: rolq %cl, %rdx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB150_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rsi -; X64-BMI2-NEXT: andq %rdx, %rsi -; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-BMI2-NEXT: jne .LBB150_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %edx -; X64-BMI2-NEXT: shlxq %rcx, %rdx, %rcx -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 63 @@ -16886,40 +15608,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movq $-2, %rdx -; X64-NOBMI2-NEXT: rolq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB151_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: andq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB151_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: movl $1, %edx +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movq $-2, %rdx -; X64-BMI2-NEXT: rolq %cl, %rdx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB151_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rsi -; X64-BMI2-NEXT: andq %rdx, %rsi -; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-BMI2-NEXT: jne .LBB151_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: movl $1, %edx -; X64-BMI2-NEXT: shlxq %rcx, %rdx, %rcx -; X64-BMI2-NEXT: andq %rcx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %shl = shl nuw i64 1, %c @@ -17023,41 +15726,21 @@ ; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: ; X64-NOBMI2: # %bb.0: # %entry ; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi +; X64-NOBMI2-NEXT: andl $63, %ecx +; X64-NOBMI2-NEXT: xorl %eax, %eax +; X64-NOBMI2-NEXT: lock btrq %rcx, (%rdi) +; X64-NOBMI2-NEXT: setb %al ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB152_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rcx -; X64-NOBMI2-NEXT: andq %rsi, %rcx -; X64-NOBMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB152_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: andq %rdx, %rax +; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_val: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB152_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rcx -; X64-BMI2-NEXT: andq %rsi, %rcx -; X64-BMI2-NEXT: lock cmpxchgq %rcx, (%rdi) -; X64-BMI2-NEXT: jne .LBB152_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: andq %rdx, %rax +; X64-BMI2-NEXT: andl $63, %esi +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: setb %al +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq entry: %rem = and i64 %c, 63 @@ -18473,51 +17156,27 @@ ; ; X64-NOBMI2-LABEL: atomic_shl1_and_64_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB162_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %r8 -; X64-NOBMI2-NEXT: andq %rsi, %r8 -; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB162_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testq %rdx, %rax -; X64-NOBMI2-NEXT: je .LBB162_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB162_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB162_3: +; X64-NOBMI2-NEXT: .LBB162_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_and_64_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB162_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %r8 -; X64-BMI2-NEXT: andq %rsi, %r8 -; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-BMI2-NEXT: jne .LBB162_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testq %rdx, %rax -; X64-BMI2-NEXT: je .LBB162_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then -; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB162_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB162_3: +; X64-BMI2-NEXT: .LBB162_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -19014,53 +17673,25 @@ ; ; X64-NOBMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: andl $31, %ecx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB165_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %r8 -; X64-NOBMI2-NEXT: andq %rsi, %r8 -; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB165_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testl %edx, %eax -; X64-NOBMI2-NEXT: je .LBB165_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: andl $31, %esi +; X64-NOBMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB165_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB165_3: +; X64-NOBMI2-NEXT: .LBB165_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_small_mask_and_64_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: andl $31, %ecx -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rcx, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB165_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %r8 -; X64-BMI2-NEXT: andq %rsi, %r8 -; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-BMI2-NEXT: jne .LBB165_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testl %edx, %eax -; X64-BMI2-NEXT: je .LBB165_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then -; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: andl $31, %esi +; X64-BMI2-NEXT: lock btrq %rsi, (%rdi) +; X64-BMI2-NEXT: jae .LBB165_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB165_3: +; X64-BMI2-NEXT: .LBB165_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -19208,47 +17839,27 @@ ; ; X64-NOBMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movq $-2, %rdx -; X64-NOBMI2-NEXT: rolq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB166_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: andq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB166_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btq %rcx, %rax -; X64-NOBMI2-NEXT: jae .LBB166_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB166_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB166_3: +; X64-NOBMI2-NEXT: .LBB166_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask0_and_64_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movq $-2, %rdx -; X64-BMI2-NEXT: rolq %cl, %rdx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB166_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rsi -; X64-BMI2-NEXT: andq %rdx, %rsi -; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-BMI2-NEXT: jne .LBB166_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btq %rcx, %rax -; X64-BMI2-NEXT: jae .LBB166_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then -; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB166_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB166_3: +; X64-BMI2-NEXT: .LBB166_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -19397,47 +18008,27 @@ ; ; X64-NOBMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movq $-2, %rdx -; X64-NOBMI2-NEXT: rolq %cl, %rdx -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB167_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %rsi -; X64-NOBMI2-NEXT: andq %rdx, %rsi -; X64-NOBMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB167_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: btq %rcx, %rax -; X64-NOBMI2-NEXT: jae .LBB167_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB167_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB167_3: +; X64-NOBMI2-NEXT: .LBB167_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask1_and_64_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movq $-2, %rdx -; X64-BMI2-NEXT: rolq %cl, %rdx -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB167_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %rsi -; X64-BMI2-NEXT: andq %rdx, %rsi -; X64-BMI2-NEXT: lock cmpxchgq %rsi, (%rdi) -; X64-BMI2-NEXT: jne .LBB167_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: btq %rcx, %rax -; X64-BMI2-NEXT: jae .LBB167_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then -; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB167_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB167_3: +; X64-BMI2-NEXT: .LBB167_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: @@ -19573,51 +18164,27 @@ ; ; X64-NOBMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: ; X64-NOBMI2: # %bb.0: # %entry -; X64-NOBMI2-NEXT: movq %rsi, %rcx -; X64-NOBMI2-NEXT: movl $1, %edx -; X64-NOBMI2-NEXT: shlq %cl, %rdx -; X64-NOBMI2-NEXT: movq $-2, %rsi -; X64-NOBMI2-NEXT: rolq %cl, %rsi -; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: .p2align 4, 0x90 -; X64-NOBMI2-NEXT: .LBB168_1: # %atomicrmw.start -; X64-NOBMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NOBMI2-NEXT: movq %rax, %r8 -; X64-NOBMI2-NEXT: andq %rsi, %r8 -; X64-NOBMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-NOBMI2-NEXT: jne .LBB168_1 -; X64-NOBMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-NOBMI2-NEXT: testq %rdx, %rax -; X64-NOBMI2-NEXT: je .LBB168_3 -; X64-NOBMI2-NEXT: # %bb.4: # %if.then -; X64-NOBMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-NOBMI2-NEXT: movl %esi, %eax +; X64-NOBMI2-NEXT: andl $63, %eax +; X64-NOBMI2-NEXT: lock btrq %rax, (%rdi) +; X64-NOBMI2-NEXT: jae .LBB168_1 +; X64-NOBMI2-NEXT: # %bb.2: # %if.then +; X64-NOBMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-NOBMI2-NEXT: retq -; X64-NOBMI2-NEXT: .LBB168_3: +; X64-NOBMI2-NEXT: .LBB168_1: ; X64-NOBMI2-NEXT: movl $123, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: atomic_shl1_mask01_and_64_gpr_brnz: ; X64-BMI2: # %bb.0: # %entry -; X64-BMI2-NEXT: movq %rsi, %rcx -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rdx -; X64-BMI2-NEXT: movq $-2, %rsi -; X64-BMI2-NEXT: rolq %cl, %rsi -; X64-BMI2-NEXT: movq (%rdi), %rax -; X64-BMI2-NEXT: .p2align 4, 0x90 -; X64-BMI2-NEXT: .LBB168_1: # %atomicrmw.start -; X64-BMI2-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-BMI2-NEXT: movq %rax, %r8 -; X64-BMI2-NEXT: andq %rsi, %r8 -; X64-BMI2-NEXT: lock cmpxchgq %r8, (%rdi) -; X64-BMI2-NEXT: jne .LBB168_1 -; X64-BMI2-NEXT: # %bb.2: # %atomicrmw.end -; X64-BMI2-NEXT: testq %rdx, %rax -; X64-BMI2-NEXT: je .LBB168_3 -; X64-BMI2-NEXT: # %bb.4: # %if.then -; X64-BMI2-NEXT: movq (%rdi,%rcx,8), %rax +; X64-BMI2-NEXT: movl %esi, %eax +; X64-BMI2-NEXT: andl $63, %eax +; X64-BMI2-NEXT: lock btrq %rax, (%rdi) +; X64-BMI2-NEXT: jae .LBB168_1 +; X64-BMI2-NEXT: # %bb.2: # %if.then +; X64-BMI2-NEXT: movq (%rdi,%rsi,8), %rax ; X64-BMI2-NEXT: retq -; X64-BMI2-NEXT: .LBB168_3: +; X64-BMI2-NEXT: .LBB168_1: ; X64-BMI2-NEXT: movl $123, %eax ; X64-BMI2-NEXT: retq entry: