Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -968,9 +968,13 @@ /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - virtual void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + virtual Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); + return nullptr; } /// Inserts in the IR a target-specific intrinsic specifying a fence. @@ -978,9 +982,13 @@ /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. /// RMW and CmpXchg set both IsStore and IsLoad to true. /// Backends with !getInsertFencesForAtomic() should keep a no-op here. - virtual void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + /// This function should either return a nullptr, or a pointer to an IR-level + /// Instruction*. Even complex fence sequences can be represented by a + /// single Instruction* through an intrinsic to be lowered later. + virtual Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { assert(!getInsertFencesForAtomic()); + return nullptr; } /// Returns true if the given (atomic) store should be expanded by the Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg. +// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. // //===----------------------------------------------------------------------===// @@ -41,6 +41,8 @@ bool runOnFunction(Function &F) override; private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); bool expandAtomicLoad(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); @@ -80,10 +82,45 @@ auto SI = dyn_cast(I); auto RMWI = dyn_cast(I); auto CASI = dyn_cast(I); - assert((LI || SI || RMWI || CASI || isa(I)) && "Unknown atomic instruction"); + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TargetLowering->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { @@ -97,30 +134,40 @@ return MadeChange; } +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( + Builder, Order, IsStore, IsLoad); + + auto TrailingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( + Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); IRBuilder<> Builder(LI); - // Note that although no fence is required before atomic load on ARM, it is - // required before SequentiallyConsistent loads for the recommended Power - // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html). - // So we let the target choose what to emit. - TLI->emitLeadingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); - - // The only 64-bit load guaranteed to be single-copy atomic by ARM is - // an ldrexd (A3.5.3). + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); - - TLI->emitTrailingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -193,17 +240,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = AI->getOrdering(); + AtomicOrdering MemOpOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder; // Given: atomicrmw some_op iN* %addr, iN %incr ordering // @@ -230,7 +271,6 @@ // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -247,7 +287,6 @@ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -256,11 +295,8 @@ } bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); AtomicOrdering MemOpOrder = - TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder; + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); @@ -292,8 +328,6 @@ // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TargetLowering->emitLeadingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); LoadInst *InitLoaded = Builder.CreateLoad(Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); @@ -317,8 +351,6 @@ Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TargetLowering->emitTrailingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(NewLoaded); AI->eraseFromParent(); Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -399,9 +399,9 @@ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; - void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -11024,11 +11024,11 @@ } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html -void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, +Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (!getInsertFencesForAtomic()) - return; + return nullptr; switch (Ord) { case NotAtomic: @@ -11036,27 +11036,27 @@ llvm_unreachable("Invalid fence: unordered/non-atomic"); case Monotonic: case Acquire: - return; // Nothing to do + return nullptr; // Nothing to do case SequentiallyConsistent: if (!IsStore) - return; // Nothing to do - /*FALLTHROUGH*/ + return nullptr; // Nothing to do + /*FALLTHROUGH*/ case Release: case AcquireRelease: if (Subtarget->isSwift()) - makeDMB(Builder, ARM_MB::ISHST); + return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else - makeDMB(Builder, ARM_MB::ISH); - return; + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } -void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, +Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { if (!getInsertFencesForAtomic()) - return; + return nullptr; switch (Ord) { case NotAtomic: @@ -11064,13 +11064,13 @@ llvm_unreachable("Invalid fence: unordered/not-atomic"); case Monotonic: case Release: - return; // Nothing to do + return nullptr; // Nothing to do case Acquire: case AcquireRelease: - case SequentiallyConsistent: - makeDMB(Builder, ARM_MB::ISH); - return; + case SequentiallyConsistent: + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } // Loads and stores less than 64-bits are already atomic; ones above that