diff --git a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h --- a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -10,6 +10,7 @@ #define LLVM_CODEGEN_ATOMICEXPANDUTILS_H #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/AtomicOrdering.h" @@ -57,7 +58,9 @@ /// [...] /// /// Returns true if the containing function was modified. -bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); } // end namespace llvm diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LowerAtomic.h" #include #include @@ -62,6 +63,7 @@ class AtomicExpand : public FunctionPass { const TargetLowering *TLI = nullptr; const DataLayout *DL = nullptr; + SmallVector CmpXchgLoopBlocks; public: static char ID; // Pass identification, replacement for typeid @@ -72,6 +74,10 @@ bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); @@ -104,7 +110,8 @@ IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -123,7 +130,8 @@ friend bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); }; // IRBuilder to be used for replacement atomic instructions. @@ -142,9 +150,11 @@ char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, - false) - +INITIALIZE_PASS_BEGIN(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } // Helper functions to retrieve the size of atomic instructions. @@ -190,6 +200,7 @@ return false; TLI = Subtarget->getTargetLowering(); DL = &F.getParent()->getDataLayout(); + CmpXchgLoopBlocks.clear(); SmallVector AtomicInsts; @@ -337,6 +348,12 @@ } else if (CASI) MadeChange |= tryExpandAtomicCmpXchg(CASI); } + + // Run CFG Simplication on CAS loop blocks. + auto TTI = &getAnalysis().getTTI(F); + for (BasicBlock *BB : CmpXchgLoopBlocks) + simplifyCFG(BB, *TTI); + return MadeChange; } @@ -602,7 +619,7 @@ << AI->getOperationName(AI->getOperation()) << " operation at " << MemScope << " memory scope"; }); - expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun, CmpXchgLoopBlocks); } return true; } @@ -873,7 +890,8 @@ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment, MemOpOrder, SSID, - PerformPartwordOp, createCmpXchgInstFun); + PerformPartwordOp, createCmpXchgInstFun, + CmpXchgLoopBlocks); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, @@ -1479,7 +1497,8 @@ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg) { + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); @@ -1501,8 +1520,9 @@ // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); + CmpXchgLoopBlocks.push_back(ExitBB); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - + CmpXchgLoopBlocks.push_back(LoopBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. @@ -1559,8 +1579,9 @@ } // Note: This function is exposed externally by AtomicExpandUtils.h -bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg) { +bool llvm::expandAtomicRMWToCmpXchg( + AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); Builder.setIsFPConstrained( AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); @@ -1574,7 +1595,7 @@ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, - CreateCmpXchg); + CreateCmpXchg, CmpXchgLoopBlocks); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -1722,9 +1743,10 @@ // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg( - I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, - Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, - SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { + I, + [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, + Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, Alignment, MemOpOrder, @@ -1734,7 +1756,8 @@ // ...and then expand the CAS into a libcall. expandAtomicCASToLibcall(Pair); - }); + }, + CmpXchgLoopBlocks); } } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll @@ -21,9 +21,7 @@ ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 ; GFX90A-NEXT: [[TMP4]] = bitcast i32 [[NEWLOADED]] to float -; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX90A: atomicrmw.end: -; GFX90A-NEXT: br label [[ENDIF:%.*]] +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ENDIF:%.*]], label [[ATOMICRMW_START]] ; GFX90A: else: ; GFX90A-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(1) [[OUT]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_START2:%.*]] @@ -36,11 +34,9 @@ ; GFX90A-NEXT: [[SUCCESS5:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 ; GFX90A-NEXT: [[NEWLOADED6:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 ; GFX90A-NEXT: [[TMP9]] = bitcast i32 [[NEWLOADED6]] to float -; GFX90A-NEXT: br i1 [[SUCCESS5]], label [[ATOMICRMW_END1:%.*]], label [[ATOMICRMW_START2]] -; GFX90A: atomicrmw.end1: -; GFX90A-NEXT: br label [[ENDIF]] +; GFX90A-NEXT: br i1 [[SUCCESS5]], label [[ENDIF]], label [[ATOMICRMW_START2]] ; GFX90A: endif: -; GFX90A-NEXT: [[COMBINE:%.*]] = phi float [ [[TMP4]], [[ATOMICRMW_END]] ], [ [[TMP9]], [[ATOMICRMW_END1]] ] +; GFX90A-NEXT: [[COMBINE:%.*]] = phi float [ [[TMP4]], [[ATOMICRMW_START]] ], [ [[TMP9]], [[ATOMICRMW_START2]] ] ; GFX90A-NEXT: store float [[COMBINE]], ptr addrspace(1) [[OUT]], align 4 ; GFX90A-NEXT: ret void ;