Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -936,6 +936,10 @@ /// \name Helpers for atomic expansion. /// @{ + /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional + /// and expand AtomicCmpXchgInst. + virtual bool hasLoadLinkedStoreConditional() const { return false; } + /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. +// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg. // //===----------------------------------------------------------------------===// @@ -44,6 +44,8 @@ bool expandAtomicLoad(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); + bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); }; } @@ -88,7 +90,7 @@ MadeChange |= expandAtomicStore(SI); } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) { MadeChange |= expandAtomicRMW(RMWI); - } else if (CASI) { + } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { MadeChange |= expandAtomicCmpXchg(CASI); } } @@ -127,9 +129,12 @@ } bool AtomicExpand::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. + // This function is only called on atomic stores that are too large to be + // atomic if implemented as a native store. So we replace them by an + // atomic swap, that can be implemented for example as a ldrex/strex on ARM + // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. + // It is the responsibility of the target to only return true in + // shouldExpandAtomicRMW in cases where this is required and possible. IRBuilder<> Builder(SI); AtomicRMWInst *AI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), @@ -141,8 +146,54 @@ } bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicRMWToLLSC(AI); + else + return expandAtomicRMWToCmpXchg(AI); +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering Order = AI->getOrdering(); + AtomicOrdering FenceOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); @@ -152,7 +203,7 @@ // of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : Order; + TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder; // Given: atomicrmw some_op iN* %addr, iN %incr ordering // @@ -179,56 +230,15 @@ // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true); + TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); @@ -237,7 +247,7 @@ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true); + TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -245,6 +255,77 @@ return true; } +bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { + auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); + AtomicOrdering FenceOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + AtomicOrdering MemOpOrder = + TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder; + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + TargetLowering->emitLeadingFence(Builder, FenceOrder, + /*IsStore=*/true, /*IsLoad=*/true); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + TargetLowering->emitTrailingFence(Builder, FenceOrder, + /*IsStore=*/true, /*IsLoad=*/true); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} + bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -324,6 +324,7 @@ bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool hasLoadLinkedStoreConditional() const override; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8551,6 +8551,10 @@ return Size <= 128; } +bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const { + return true; +} + Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -392,6 +392,7 @@ bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + bool hasLoadLinkedStoreConditional() const override; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -10982,6 +10982,8 @@ return true; } +bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } + static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -14,7 +14,6 @@ set(sources X86AsmPrinter.cpp - X86AtomicExpandPass.cpp X86FastISel.cpp X86FloatingPoint.cpp X86FrameLowering.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -23,10 +23,6 @@ class ImmutablePass; class X86TargetMachine; -/// createX86AtomicExpandPass - This pass expands atomic operations that cannot -/// be handled natively in terms of a loop using cmpxchg. -FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM); - /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// Index: lib/Target/X86/X86AtomicExpandPass.cpp =================================================================== --- lib/Target/X86/X86AtomicExpandPass.cpp +++ /dev/null @@ -1,283 +0,0 @@ -//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions which -// cannot be implemented as a single instruction with cmpxchg-based loops. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86TargetMachine.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -#define DEBUG_TYPE "x86-atomic-expand" - -namespace { - class X86AtomicExpandPass : public FunctionPass { - const X86TargetMachine *TM; - public: - static char ID; // Pass identification, replacement for typeid - explicit X86AtomicExpandPass(const X86TargetMachine *TM) - : FunctionPass(ID), TM(TM) {} - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool needsCmpXchgNb(Type *MemType); - - /// There are four kinds of atomic operations. Two never need expanding: - /// cmpxchg is what we expand the others *to*, and loads are easily handled - /// by ISelLowering. Atomicrmw and store can need expanding in some - /// circumstances. - bool shouldExpand(Instruction *Inst); - - /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms - /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic. - bool shouldExpandStore(StoreInst *SI); - - /// Only some atomicrmw instructions need expanding -- some operations - /// (e.g. max) have absolutely no architectural support; some (e.g. or) have - /// limited support but can't return the previous value; some (e.g. add) - /// have complete support in the instruction set. - /// - /// Also, naturally, 128-bit operations always need to be expanded. - bool shouldExpandAtomicRMW(AtomicRMWInst *AI); - - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicStore(StoreInst *SI); - }; -} - -char X86AtomicExpandPass::ID = 0; - -FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { - return new X86AtomicExpandPass(TM); -} - -bool X86AtomicExpandPass::runOnFunction(Function &F) { - SmallVector AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa(&Inst) || - (isa(&Inst) && cast(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!shouldExpand(Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast(Inst)) - MadeChange |= expandAtomicRMW(AI); - if (StoreInst *SI = dyn_cast(Inst)) - MadeChange |= expandAtomicStore(SI); - - assert(MadeChange && "Atomic inst not expanded when it should be?"); - Inst->eraseFromParent(); - } - - return MadeChange; -} - -/// Returns true if the operand type is 1 step up from the native width, and -/// the corresponding cmpxchg8b or cmpxchg16b instruction is available -/// (otherwise we leave them alone to become __sync_fetch_and_... calls). -bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { - const X86Subtarget &Subtarget = TM->getSubtarget(); - unsigned OpWidth = MemType->getPrimitiveSizeInBits(); - - if (OpWidth == 64) - return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b - if (OpWidth == 128) - return Subtarget.hasCmpxchg16b(); - - return false; -} - -bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { - const X86Subtarget &Subtarget = TM->getSubtarget(); - unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; - - if (needsCmpXchgNb(AI->getType())) - return true; - - if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) - return false; - - AtomicRMWInst::BinOp Op = AI->getOperation(); - switch (Op) { - default: - llvm_unreachable("Unknown atomic operation"); - case AtomicRMWInst::Xchg: - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - // It's better to use xadd, xsub or xchg for these in all cases. - return false; - case AtomicRMWInst::Or: - case AtomicRMWInst::And: - case AtomicRMWInst::Xor: - // If the atomicrmw's result isn't actually used, we can just add a "lock" - // prefix to a normal instruction for these operations. - return !AI->use_empty(); - case AtomicRMWInst::Nand: - case AtomicRMWInst::Max: - case AtomicRMWInst::Min: - case AtomicRMWInst::UMax: - case AtomicRMWInst::UMin: - // These always require a non-trivial set of data operations on x86. We must - // use a cmpxchg loop. - return true; - } -} - -bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { - if (needsCmpXchgNb(SI->getValueOperand()->getType())) - return true; - - return false; -} - -bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { - if (AtomicRMWInst *AI = dyn_cast(Inst)) - return shouldExpandAtomicRMW(AI); - if (StoreInst *SI = dyn_cast(Inst)) - return shouldExpandStore(SI); - return false; -} - -/// Emit IR to implement the given atomicrmw operation on values in registers, -/// returning the new value. -static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, - Value *Loaded, Value *Inc) { - Value *NewVal; - switch (Op) { - case AtomicRMWInst::Xchg: - return Inc; - case AtomicRMWInst::Add: - return Builder.CreateAdd(Loaded, Inc, "new"); - case AtomicRMWInst::Sub: - return Builder.CreateSub(Loaded, Inc, "new"); - case AtomicRMWInst::And: - return Builder.CreateAnd(Loaded, Inc, "new"); - case AtomicRMWInst::Nand: - return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); - case AtomicRMWInst::Or: - return Builder.CreateOr(Loaded, Inc, "new"); - case AtomicRMWInst::Xor: - return Builder.CreateXor(Loaded, Inc, "new"); - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - default: - break; - } - llvm_unreachable("Unknown atomic op"); -} - -bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // %init_loaded = load atomic iN* %addr - // br label %loop - // loop: - // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] - // %new = some_op iN %loaded, %incr - // %pair = cmpxchg iN* %addr, iN %loaded, iN %new - // %new_loaded = extractvalue { iN, i1 } %pair, 0 - // %success = extractvalue { iN, i1 } %pair, 1 - // br i1 %success, label %atomicrmw.end, label %loop - // atomicrmw.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we want a load. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(Addr); - InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); - Loaded->addIncoming(InitLoaded, BB); - - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); - - Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, Order, - AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); - Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - Loaded->addIncoming(NewLoaded, LoopBB); - - Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); - Builder.CreateCondBr(Success, ExitBB, LoopBB); - - AI->replaceAllUsesWith(NewLoaded); - - return true; -} - -bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { - // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express - // this in terms of the usual expansion to "atomicrmw xchg". - IRBuilder<> Builder(SI); - AtomicOrdering Order = - SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering(); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), Order); - - // Now we have an appropriate swap instruction, lower it as usual. - if (shouldExpandAtomicRMW(AI)) { - expandAtomicRMW(AI); - AI->eraseFromParent(); - return true; - } - - return AI; -} Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -960,6 +960,12 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + bool needsCmpXchgNb(const Type *MemType) const; + /// Utility function to emit atomic-load-arith operations (and, or, xor, /// nand, max, min, umax, umin). It takes the corresponding instruction to /// expand, the associated machine basic block, and the associated X86 Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16771,6 +16771,68 @@ } } +/// Returns true if the operand type is exactly twice the native width, and +/// the corresponding cmpxchg8b or cmpxchg16b instruction is available. +/// Used to know whether to use cmpxchg8/16b when expanding atomic operations +/// (otherwise we leave them alone to become __sync_fetch_and_... calls). +bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const { + const X86Subtarget &Subtarget = + getTargetMachine().getSubtarget(); + unsigned OpWidth = MemType->getPrimitiveSizeInBits(); + + if (OpWidth == 64) + return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b + else if (OpWidth == 128) + return Subtarget.hasCmpxchg16b(); + else + return false; +} + +bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + return needsCmpXchgNb(SI->getValueOperand()->getType()); +} + +bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *SI) const { + return false; // FIXME, currently these are expanded separately in this file. +} + +bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + const X86Subtarget &Subtarget = + getTargetMachine().getSubtarget(); + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + const Type *MemType = AI->getType(); + + // If the operand is too big, we must see if cmpxchg8/16b is available + // and default to library calls otherwise. + if (MemType->getPrimitiveSizeInBits() > NativeWidth) + return needsCmpXchgNb(MemType); + + AtomicRMWInst::BinOp Op = AI->getOperation(); + switch (Op) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + // It's better to use xadd, xsub or xchg for these in all cases. + return false; + case AtomicRMWInst::Or: + case AtomicRMWInst::And: + case AtomicRMWInst::Xor: + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + return !AI->use_empty(); + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // These always require a non-trivial set of data operations on x86. We must + // use a cmpxchg loop. + return true; + } +} + static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); @@ -17290,7 +17352,7 @@ case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: // Delegate to generic TypeLegalization. Situations we can really handle - // should have already been dealt with by X86AtomicExpandPass.cpp. + // should have already been dealt with by AtomicExpandPass.cpp. break; case ISD::ATOMIC_LOAD: { ReplaceATOMIC_LOAD(N, Results, DAG); Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -105,7 +105,7 @@ } void X86PassConfig::addIRPasses() { - addPass(createX86AtomicExpandPass(&getX86TargetMachine())); + addPass(createAtomicExpandPass(&getX86TargetMachine())); TargetPassConfig::addIRPasses(); }