Index: llvm/trunk/include/llvm/CodeGen/AtomicExpandUtils.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/AtomicExpandUtils.h +++ llvm/trunk/include/llvm/CodeGen/AtomicExpandUtils.h @@ -0,0 +1,57 @@ +//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm { +class Value; +class AtomicRMWInst; + + +/// Parameters (see the expansion example below): +/// (the builder, %addr, %loaded, %new_val, ordering, +/// /* OUT */ %success, /* OUT */ %new_loaded) +typedef function_ref &, Value *, Value *, Value *, + AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun; + +/// \brief Expand an atomic RMW instruction into a loop utilizing +/// cmpxchg. You'll want to make sure your target machine likes cmpxchg +/// instructions in the first place and that there isn't another, better, +/// transformation available (for example AArch32/AArch64 have linked loads). +/// +/// This is useful in passes which can't rewrite the more exotic RMW +/// instructions directly into a platform specific intrinsics (because, say, +/// those intrinsics don't exist). If such a pass is able to expand cmpxchg +/// instructions directly however, then, with this function, it could avoid two +/// extra module passes (avoiding passes by `-atomic-expand` and itself). A +/// specific example would be PNaCl's `RewriteAtomics` pass. +/// +/// Given: atomicrmw some_op iN* %addr, iN %incr ordering +/// +/// The standard expansion we produce is: +/// [...] +/// %init_loaded = load atomic iN* %addr +/// br label %loop +/// loop: +/// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] +/// %new = some_op iN %loaded, %incr +/// ; This is what -atomic-expand will produce using this function on i686 targets: +/// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val +/// %new_loaded = extractvalue { iN, i1 } %pair, 0 +/// %success = extractvalue { iN, i1 } %pair, 1 +/// ; End callback produced IR +/// br i1 %success, label %atomicrmw.end, label %loop +/// atomicrmw.end: +/// [...] +/// +/// Returns true if the containing function was modified. +bool +expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); +} Index: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -50,7 +51,6 @@ bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); - bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); @@ -226,6 +226,17 @@ return tryExpandAtomicRMW(AI); } +static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, + Value *Loaded, Value *NewVal, + AtomicOrdering MemOpOrder, + Value *&Success, Value *&NewLoaded) { + Value* Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); +} + bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { switch (TLI->shouldExpandAtomicRMWInIR(AI)) { case TargetLoweringBase::AtomicRMWExpansionKind::None: @@ -239,7 +250,7 @@ return expandAtomicRMWToLLSC(AI); } case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: { - return expandAtomicRMWToCmpXchg(AI); + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); } } llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); @@ -337,70 +348,6 @@ return true; } -bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - AtomicOrdering MemOpOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // %init_loaded = load atomic iN* %addr - // br label %loop - // loop: - // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] - // %new = some_op iN %loaded, %incr - // %pair = cmpxchg iN* %addr, iN %loaded, iN %new - // %new_loaded = extractvalue { iN, i1 } %pair, 0 - // %success = extractvalue { iN, i1 } %pair, 1 - // br i1 %success, label %atomicrmw.end, label %loop - // atomicrmw.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we want a load. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(Addr); - // Atomics require at least natural alignment. - InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); - Loaded->addIncoming(InitLoaded, BB); - - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); - - Value *Pair = Builder.CreateAtomicCmpXchg( - Addr, Loaded, NewVal, MemOpOrder, - AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); - Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); - Loaded->addIncoming(NewLoaded, LoopBB); - - Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); - Builder.CreateCondBr(Success, ExitBB, LoopBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - - AI->replaceAllUsesWith(NewLoaded); - AI->eraseFromParent(); - - return true; -} - bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); @@ -562,3 +509,72 @@ } return false; } + +bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg) { + assert(AI); + + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *NewLoaded = nullptr; + Value *Success = nullptr; + + CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, + Success, NewLoaded); + assert(Success && NewLoaded); + + Loaded->addIncoming(NewLoaded, LoopBB); + + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +}