diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -25,6 +25,7 @@ set(sources X86AsmPrinter.cpp + X86AtomicANDHoist.cpp X86AvoidTrailingCall.cpp X86CallFrameOptimization.cpp X86CallingConv.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -145,6 +145,10 @@ /// ways. FunctionPass *createX86PartialReductionPass(); +/// This pass hoists AND instruction that uses a atomic logic instruction to +/// the same basic block so that ISel has the ability to optimize them. +FunctionPass *createX86AtomicANDHoistPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); @@ -159,6 +163,7 @@ void initializeFixupLEAPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeWinEHStatePassPass(PassRegistry &); +void initializeX86AtomicANDHoistPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); void initializeX86AvoidTrailingCallPassPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86AtomicANDHoist.cpp b/llvm/lib/Target/X86/X86AtomicANDHoist.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86AtomicANDHoist.cpp @@ -0,0 +1,66 @@ +//===- X86AtomicANDHoist.ll - Hoist AND instrution uses atomic logic one --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-atomic-and-hoist" + +namespace { + +class X86AtomicANDHoist : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid. + + X86AtomicANDHoist() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + StringRef getPassName() const override { return "X86 Atomic AND Hoist"; } +}; +} // namespace + +FunctionPass *llvm::createX86AtomicANDHoistPass() { + return new X86AtomicANDHoist(); +} + +char X86AtomicANDHoist::ID = 0; + +INITIALIZE_PASS(X86AtomicANDHoist, DEBUG_TYPE, "X86 Atomic AND Hoist", false, + false) + +bool X86AtomicANDHoist::runOnFunction(Function &F) { + bool Changed = false; + + for (auto &BB : F) { + for (auto &I : BB) { + auto *AI = dyn_cast(&I); + if (!AI || !AI->hasOneUse()) + continue; + Instruction *And = AI->user_back(); + if (And->getOpcode() != Instruction::And || And->getParent() == &BB || + !isa(And->getOperand(1))) + continue; + AtomicRMWInst::BinOp Op = AI->getOperation(); + if (Op != AtomicRMWInst::Or && Op != AtomicRMWInst::And && + Op != AtomicRMWInst::Xor) + continue; + Changed = true; + And->moveAfter(AI); + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -790,6 +790,9 @@ LOR, LXOR, LAND, + LBTS, + LBTC, + LBTR, // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, @@ -1640,6 +1643,8 @@ bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + TargetLoweringBase::AtomicExpansionKind + shouldExpandAtomicLogicRMWInIR(AtomicRMWInst *AI) const; LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5685,7 +5685,12 @@ bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { - return true; + auto *AI = dyn_cast(AndI.getOperand(0)); + if (!AI) + return true; + AtomicRMWInst::BinOp Op = AI->getOperation(); + return Op != AtomicRMWInst::Or && Op != AtomicRMWInst::And && + Op != AtomicRMWInst::Xor; } bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { @@ -30313,6 +30318,33 @@ : AtomicExpansionKind::None; } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldExpandAtomicLogicRMWInIR(AtomicRMWInst *AI) const { + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + if (AI->use_empty()) + return AtomicExpansionKind::None; + + // If the atomicrmw's result is used by a single bit AND, we may use + // bts/btr/btc instruction for these operations. + auto *C1 = dyn_cast(AI->getValOperand()); + Instruction *I = AI->user_back(); + if (!C1 || !AI->hasOneUse() || I->getOpcode() != Instruction::And || + AI->getParent() != I->getParent()) + return AtomicExpansionKind::CmpXChg; + // The following instruction must be a AND single bit. + auto *C2 = dyn_cast(I->getOperand(1)); + unsigned Bits = AI->getType()->getPrimitiveSizeInBits(); + if (!C2 || Bits == 8 || !isPowerOf2_64(C2->getZExtValue())) + return AtomicExpansionKind::CmpXChg; + + if (AI->getOperation() == AtomicRMWInst::And) + return ~C1->getValue() == C2->getValue() ? AtomicExpansionKind::None + : AtomicExpansionKind::CmpXChg; + + return C1 == C2 ? AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg; +} + TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; @@ -30337,10 +30369,7 @@ case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: - // If the atomicrmw's result isn't actually used, we can just add a "lock" - // prefix to a normal instruction for these operations. - return !AI->use_empty() ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + return shouldExpandAtomicLogicRMWInIR(AI); case AtomicRMWInst::Nand: case AtomicRMWInst::Max: case AtomicRMWInst::Min: @@ -31038,6 +31067,40 @@ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, AN->getMemOperand()); } + if (Opc == ISD::ATOMIC_LOAD_OR || Opc == ISD::ATOMIC_LOAD_XOR || + Opc == ISD::ATOMIC_LOAD_AND) { + ConstantSDNode *C1 = cast(RHS); + ConstantSDNode *C2 = nullptr; + for (auto UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { + // Skip uses of the chain value. Result 0 of the node is the load value. + if (UI.getUse().getResNo() != 0) + continue; + if (C2 || UI->getOpcode() != ISD::AND) + report_fatal_error("Atomic result must be used by one AND"); + C2 = cast(UI->getOperand(1)); + assert(isPowerOf2_64(C2->getZExtValue()) && "Must be power of 2 value"); + } + if (Opc == ISD::ATOMIC_LOAD_AND) { + assert(~C1->getAPIntValue() == C2->getAPIntValue() && + "Cannot lower to BTR"); + Opc = X86ISD::LBTR; + } else { + assert(C1 == C2 && "Cannot lower to BTS/BTC"); + Opc = Opc == ISD::ATOMIC_LOAD_OR ? X86ISD::LBTS : X86ISD::LBTC; + } + + unsigned Imm = countTrailingZeros(C2->getZExtValue()); + MachineMemOperand *MMO = cast(N)->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode( + Opc, DL, DAG.getVTList(VT, MVT::Other), + {Chain, LHS, DAG.getConstant(Imm, DL, VT)}, VT, MMO); + Chain = Res.getValue(1); + Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); + if (Imm) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getShiftAmountConstant(Imm, VT, DL)); + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Res, Chain); + } assert(Opc == ISD::ATOMIC_LOAD_ADD && "Used AtomicRMW ops other than Add should have been expanded!"); return N; @@ -32821,6 +32884,9 @@ NODE_NAME_CASE(LOR) NODE_NAME_CASE(LXOR) NODE_NAME_CASE(LAND) + NODE_NAME_CASE(LBTS) + NODE_NAME_CASE(LBTC) + NODE_NAME_CASE(LBTR) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -839,6 +839,28 @@ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } +multiclass ATOMIC_LOG { + let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteBitTestSetRegRMW] in { + def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i16imm:$src2), + !strconcat(mnemonic, "{w}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i16 imm:$src2)))]>, + OpSize16, TB, LOCK; + def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i32imm:$src2), + !strconcat(mnemonic, "{l}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i32 imm:$src2)))]>, + OpSize32, TB, LOCK; + def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i64imm:$src2), + !strconcat(mnemonic, "{q}\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (frag addr:$src1, (i64 imm:$src2)))]>, + TB, LOCK; + } +} + +defm LOCK_BTS : ATOMIC_LOG; +defm LOCK_BTR : ATOMIC_LOG; +defm LOCK_BTC : ATOMIC_LOG; + // Atomic compare and swap. multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag> { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -282,6 +282,15 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def X86lock_bts : SDNode<"X86ISD::LBTS", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; +def X86lock_btc : SDNode<"X86ISD::LBTC", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; +def X86lock_btr : SDNode<"X86ISD::LBTR", SDTLockBinaryArithWithFlags, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPMemOperand]>; def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -485,6 +485,7 @@ } bool X86PassConfig::addPreISel() { + addPass(createX86AtomicANDHoistPass()); // Only add this pass for 32-bit x86 Windows. const Triple &TT = TM->getTargetTriple(); if (TT.isOSWindows() && TT.getArch() == Triple::x86) diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -31,6 +31,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Expand indirectbr instructions ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: X86 Atomic AND Hoist ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier diff --git a/llvm/test/CodeGen/X86/atomic-bit-test.ll b/llvm/test/CodeGen/X86/atomic-bit-test.ll --- a/llvm/test/CodeGen/X86/atomic-bit-test.ll +++ b/llvm/test/CodeGen/X86/atomic-bit-test.ll @@ -9,35 +9,17 @@ define i16 @bts1() nounwind { ; X86-LABEL: bts1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB0_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB0_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB0_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -49,35 +31,19 @@ define i16 @bts2() nounwind { ; X86-LABEL: bts2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB1_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $2, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB1_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB1_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $2, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB1_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -89,35 +55,19 @@ define i16 @bts15() nounwind { ; X86-LABEL: bts15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB2_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $32768, %ecx # imm = 0x8000 -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB2_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: bts15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $32768, %ecx # imm = 0x8000 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB2_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -129,30 +79,18 @@ define i32 @bts31() nounwind { ; X86-LABEL: bts31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB3_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB3_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: bts31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB3_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4 @@ -185,17 +123,10 @@ ; ; X64-LABEL: bts63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB4_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB4_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8 @@ -206,35 +137,17 @@ define i16 @btc1() nounwind { ; X86-LABEL: btc1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB5_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB5_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB5_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB5_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -246,35 +159,19 @@ define i16 @btc2() nounwind { ; X86-LABEL: btc2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB6_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $2, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB6_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB6_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $2, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB6_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -286,35 +183,19 @@ define i16 @btc15() nounwind { ; X86-LABEL: btc15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB7_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $32768, %ecx # imm = 0x8000 -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB7_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btc15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB7_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $32768, %ecx # imm = 0x8000 -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB7_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -326,30 +207,18 @@ define i32 @btc31() nounwind { ; X86-LABEL: btc31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB8_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000 -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB8_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btcl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: btc31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB8_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000 -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB8_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4 @@ -382,17 +251,10 @@ ; ; X64-LABEL: btc63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB9_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: xorq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB9_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btcq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8 @@ -403,35 +265,17 @@ define i16 @btr1() nounwind { ; X86-LABEL: btr1: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB10_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $65534, %ecx # imm = 0xFFFE -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB10_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr1: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB10_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $65534, %ecx # imm = 0xFFFE -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB10_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -443,35 +287,19 @@ define i16 @btr2() nounwind { ; X86-LABEL: btr2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB11_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $65533, %ecx # imm = 0xFFFD -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB11_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $2, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $1, v16 +; X86-NEXT: setb %al +; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB11_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $65533, %ecx # imm = 0xFFFD -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB11_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $2, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $1, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: addl %eax, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -483,35 +311,19 @@ define i16 @btr15() nounwind { ; X86-LABEL: btr15: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB12_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB12_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $32768, %eax # imm = 0x8000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrw $15, v16 +; X86-NEXT: setb %al +; X86-NEXT: shll $15, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: btr15: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB12_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $eax -; X64-NEXT: jne .LBB12_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $32768, %eax # imm = 0x8000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrw $15, v16(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $15, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq entry: @@ -523,30 +335,18 @@ define i32 @btr31() nounwind { ; X86-LABEL: btr31: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB13_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB13_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btrl $31, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $31, %eax ; X86-NEXT: retl ; ; X64-LABEL: btr31: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB13_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB13_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrl $31, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $31, %eax ; X64-NEXT: retq entry: %0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4 @@ -585,18 +385,10 @@ ; ; X64-LABEL: btr63: ; X64: # %bb.0: # %entry -; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; X64-NEXT: movq v64(%rip), %rax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB14_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: andq %rcx, %rdx -; X64-NEXT: lock cmpxchgq %rdx, v64(%rip) -; X64-NEXT: jne .LBB14_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: incq %rcx -; X64-NEXT: andq %rcx, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btrq $63, v64(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shlq $63, %rax ; X64-NEXT: retq entry: %0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8 @@ -655,36 +447,18 @@ define i16 @multi_use2() nounwind { ; X86-LABEL: multi_use2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzwl v16, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB16_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $1, %ecx -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: lock cmpxchgw %cx, v16 -; X86-NEXT: # kill: def $ax killed $ax def $eax -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end -; X86-NEXT: andl $1, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsw $0, v16 +; X86-NEXT: setb %al ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: multi_use2: ; X64: # %bb.0: # %entry -; X64-NEXT: movzwl v16(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB16_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $1, %ecx -; X64-NEXT: # kill: def $ax killed $ax killed $rax -; X64-NEXT: lock cmpxchgw %cx, v16(%rip) -; X64-NEXT: # kill: def $ax killed $ax def $rax -; X64-NEXT: jne .LBB16_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end -; X64-NEXT: andl $1, %eax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsw $0, v16(%rip) +; X64-NEXT: setb %al ; X64-NEXT: leal (%rax,%rax,2), %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -764,39 +538,23 @@ define void @no_and_cmp0_fold() nounwind { ; X86-LABEL: no_and_cmp0_fold: ; X86: # %bb.0: # %entry -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB18_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: orl $8, %ecx -; X86-NEXT: lock cmpxchgl %ecx, v32 -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: lock btsl $3, v32 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: testb %al, %al -; X86-NEXT: je .LBB18_3 -; X86-NEXT: # %bb.4: # %if.end +; X86-NEXT: je .LBB18_1 +; X86-NEXT: # %bb.2: # %if.end ; X86-NEXT: retl -; X86-NEXT: .LBB18_3: # %if.then +; X86-NEXT: .LBB18_1: # %if.then ; ; X64-LABEL: no_and_cmp0_fold: ; X64: # %bb.0: # %entry -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB18_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $8, %ecx -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB18_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: lock btsl $3, v32(%rip) ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: testb %al, %al -; X64-NEXT: je .LBB18_3 -; X64-NEXT: # %bb.4: # %if.end +; X64-NEXT: je .LBB18_1 +; X64-NEXT: # %bb.2: # %if.end ; X64-NEXT: retq -; X64-NEXT: .LBB18_3: # %if.then +; X64-NEXT: .LBB18_1: # %if.then entry: %0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4 %and = and i32 %0, 8 @@ -815,32 +573,20 @@ ; X86-LABEL: split_hoist_and: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl v32, %eax -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB19_1: # %atomicrmw.start -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %eax, %edx -; X86-NEXT: orl $8, %edx -; X86-NEXT: lock cmpxchgl %edx, v32 -; X86-NEXT: jne .LBB19_1 -; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: lock btsl $3, v32 +; X86-NEXT: setb %al +; X86-NEXT: shll $3, %eax ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: andl $8, %eax ; X86-NEXT: retl ; ; X64-LABEL: split_hoist_and: ; X64: # %bb.0: -; X64-NEXT: movl v32(%rip), %eax -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB19_1: # %atomicrmw.start -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: orl $8, %ecx -; X64-NEXT: lock cmpxchgl %ecx, v32(%rip) -; X64-NEXT: jne .LBB19_1 -; X64-NEXT: # %bb.2: # %atomicrmw.end +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: lock btsl $3, v32(%rip) +; X64-NEXT: setb %al +; X64-NEXT: shll $3, %eax ; X64-NEXT: testl %edi, %edi -; X64-NEXT: andl $8, %eax ; X64-NEXT: retq %2 = atomicrmw or i32* @v32, i32 8 monotonic, align 4 %3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false) diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -67,6 +67,7 @@ ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: X86 Atomic AND Hoist ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier