diff --git a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h --- a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -10,6 +10,7 @@ #define LLVM_CODEGEN_ATOMICEXPANDUTILS_H #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/AtomicOrdering.h" @@ -57,7 +58,9 @@ /// [...] /// /// Returns true if the containing function was modified. -bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); } // end namespace llvm diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" @@ -48,6 +49,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LowerAtomic.h" #include #include @@ -62,6 +64,7 @@ class AtomicExpand : public FunctionPass { const TargetLowering *TLI = nullptr; const DataLayout *DL = nullptr; + SmallVector CmpXchgLoopBlocks; public: static char ID; // Pass identification, replacement for typeid @@ -72,6 +75,11 @@ bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); @@ -104,7 +112,8 @@ IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -123,7 +132,8 @@ friend bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); }; // IRBuilder to be used for replacement atomic instructions. @@ -142,9 +152,12 @@ char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, - false) - +INITIALIZE_PASS_BEGIN(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } // Helper functions to retrieve the size of atomic instructions. @@ -337,6 +350,17 @@ } else if (CASI) MadeChange |= tryExpandAtomicCmpXchg(CASI); } + + DominatorTreeWrapperPass *const DTW = + getAnalysisIfAvailable(); + DomTreeUpdater DTU(DTW ? &DTW->getDomTree() : nullptr, + DomTreeUpdater::UpdateStrategy::Lazy); + auto TTI = &getAnalysis().getTTI(F); + for (BasicBlock *BB : CmpXchgLoopBlocks) { + simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? &DTU : nullptr, + SimplifyCFGOptions().bonusInstThreshold(2)); + } + return MadeChange; } @@ -604,7 +628,7 @@ << AI->getOperationName(AI->getOperation()) << " operation at " << MemScope << " memory scope"; }); - expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun, CmpXchgLoopBlocks); } return true; } @@ -880,7 +904,8 @@ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment, MemOpOrder, SSID, - PerformPartwordOp, createCmpXchgInstFun); + PerformPartwordOp, createCmpXchgInstFun, + CmpXchgLoopBlocks); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, @@ -1486,7 +1511,8 @@ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg) { + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); @@ -1508,8 +1534,9 @@ // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); + CmpXchgLoopBlocks.push_back(ExitBB); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - + CmpXchgLoopBlocks.push_back(LoopBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. @@ -1566,8 +1593,9 @@ } // Note: This function is exposed externally by AtomicExpandUtils.h -bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg) { +bool llvm::expandAtomicRMWToCmpXchg( + AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); Builder.setIsFPConstrained( AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); @@ -1581,7 +1609,7 @@ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, - CreateCmpXchg); + CreateCmpXchg, CmpXchgLoopBlocks); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -1729,9 +1757,10 @@ // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg( - I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, - Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, - SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { + I, + [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, + Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, Alignment, MemOpOrder, @@ -1741,7 +1770,8 @@ // ...and then expand the CAS into a libcall. expandAtomicCASToLibcall(Pair); - }); + }, + CmpXchgLoopBlocks); } }