Index: llvm/include/llvm/Transforms/IPO/HotColdSplitting.h =================================================================== --- llvm/include/llvm/Transforms/IPO/HotColdSplitting.h +++ llvm/include/llvm/Transforms/IPO/HotColdSplitting.h @@ -23,6 +23,7 @@ class OptimizationRemarkEmitter; class AssumptionCache; class DominatorTree; +class CodeExtractorAnalysisCache; /// A sequence of basic blocks. /// @@ -43,8 +44,10 @@ bool isFunctionCold(const Function &F) const; bool shouldOutlineFrom(const Function &F) const; bool outlineColdRegions(Function &F, bool HasProfileSummary); - Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, - BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + Function *extractColdRegion(const BlockSequence &Region, + const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, + TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count); ProfileSummaryInfo *PSI; Index: llvm/include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -22,6 +22,7 @@ namespace llvm { +class AllocaInst; class BasicBlock; class BlockFrequency; class BlockFrequencyInfo; @@ -36,6 +37,37 @@ class Type; class Value; +/// A cache for the CodeExtractor analysis. The operation \ref +/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this +/// object. This object should be conservatively be considered invalid if any +/// other mutating operations on the IR occur. +class CodeExtractorAnalysisCache { + /// The allocas in the function. + SmallVector Allocas; + + /// Base memory addresses of load/store instructions, grouped by block. + DenseMap> BaseMemAddrs; + + /// Blocks which contain instructions which may have unknown side-effects + /// on memory. + DenseSet SideEffectingBlocks; + + void findSideEffectInfoForBlock(BasicBlock &BB); + +public: + /// Constructing this object is O(n) in the size of the function. + CodeExtractorAnalysisCache(Function &F); + + /// Get the allocas in the function at the time the analysis was created. + /// Note that some of these allocas may no longer be present in the function, + /// due to \ref CodeExtractor::extractCodeRegion. + ArrayRef getAllocas() const { return Allocas; } + + /// Check whether \p BB contains an instruction thought to load from, store + /// to, or otherwise clobber the alloca \p Addr. + bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const; +}; + /// Utility class for extracting code into a new function. /// /// This utility provides a simple interface for extracting some sequence of @@ -104,7 +136,7 @@ /// /// Returns zero when called on a CodeExtractor instance where isEligible /// returns false. - Function *extractCodeRegion(); + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); /// Verify that assumption cache isn't stale after a region is extracted. /// Returns false when verifier finds errors. AssumptionCache is passed as @@ -135,7 +167,9 @@ /// region. /// /// Returns true if it is safe to do the code motion. - bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + bool + isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *AllocaAddr) const; /// Find the set of allocas whose life ranges are contained within the /// outlined region. @@ -145,7 +179,8 @@ /// are used by the lifetime markers are also candidates for shrink- /// wrapping. The instructions that need to be sunk are collected in /// 'Allocas'. - void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + void findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const; /// Find or create a block within the outline region for placing hoisted @@ -166,8 +201,9 @@ Instruction *LifeEnd = nullptr; }; - LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr, - BasicBlock *ExitBlock) const; + LifetimeMarkerInfo + getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const; void severSplitPHINodesOfEntry(BasicBlock *&Header); void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); Index: llvm/lib/Transforms/IPO/BlockExtractor.cpp =================================================================== --- llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -206,7 +206,8 @@ ++NumExtracted; Changed = true; } - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -290,13 +290,10 @@ return Penalty; } -Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, - DominatorTree &DT, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, - unsigned Count) { +Function *HotColdSplitting::extractColdRegion( + const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. @@ -318,7 +315,7 @@ return nullptr; Function *OrigF = Region[0]->getParent(); - if (Function *OutF = CE.extractCodeRegion()) { + if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast(U); CallSite CS(CI); @@ -606,9 +603,14 @@ } } + if (OutliningWorklist.empty()) + return Changed; + // Outline single-entry cold regions, splitting up larger regions as needed. unsigned OutlinedFunctionID = 1; - while (!OutliningWorklist.empty()) { + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(F); + do { OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { @@ -619,14 +621,14 @@ BB->dump(); }); - Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, - OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI, + ORE, AC, OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; Changed = true; } } while (!Region.empty()); - } + } while (!OutliningWorklist.empty()); return Changed; } Index: llvm/lib/Transforms/IPO/LoopExtractor.cpp =================================================================== --- llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -141,10 +141,12 @@ if (NumLoops == 0) return Changed; --NumLoops; AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); if (auto *ACT = getAnalysisIfAvailable()) - AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent()); + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion() != nullptr) { + if (Extractor.extractCodeRegion(CEAC) != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. Index: llvm/lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- llvm/lib/Transforms/IPO/PartialInlining.cpp +++ llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1122,6 +1122,9 @@ BranchProbabilityInfo BPI(*ClonedFunc, LI); ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); + SetVector Inputs, Outputs, Sinks; for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : ClonedOMRI->ORI) { @@ -1148,7 +1151,7 @@ if (Outputs.size() > 0 && !ForceLiveExit) continue; - Function *OutlinedFunc = CE.extractCodeRegion(); + Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); @@ -1210,11 +1213,12 @@ } // Extract the body of the if. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); Function *OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), /* AllowVarargs */ true) - .extractCodeRegion(); + .extractCodeRegion(CEAC); if (OutlinedFunc) { BasicBlock *OutliningCallBB = Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -305,52 +305,79 @@ return CommonExitBlock; } +CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) { + for (BasicBlock &BB : F) { + for (Instruction &II : BB.instructionsWithoutDebug()) + if (auto *AI = dyn_cast(&II)) + Allocas.push_back(AI); + + findSideEffectInfoForBlock(BB); + } +} + +void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) { + for (Instruction &II : BB.instructionsWithoutDebug()) { + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!isa(Base)) { + SideEffectingBlocks.insert(&BB); + return; + } + BaseMemAddrs[&BB].insert(Base); + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast(&II); + if (IntrInst) { + if (IntrInst->isLifetimeStartOrEnd()) + break; + SideEffectingBlocks.insert(&BB); + return; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) { + SideEffectingBlocks.insert(&BB); + return; + } + } + } + } +} + +bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr( + BasicBlock &BB, AllocaInst *Addr) const { + if (SideEffectingBlocks.count(&BB)) + return true; + auto It = BaseMemAddrs.find(&BB); + if (It != BaseMemAddrs.end()) + return It->second.count(Addr); + return false; +} + bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( - Instruction *Addr) const { + const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const { AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); Function *Func = (*Blocks.begin())->getParent(); for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; - for (Instruction &II : BB) { - if (isa(II)) - continue; - - unsigned Opcode = II.getOpcode(); - Value *MemAddr = nullptr; - switch (Opcode) { - case Instruction::Store: - case Instruction::Load: { - if (Opcode == Instruction::Store) { - StoreInst *SI = cast(&II); - MemAddr = SI->getPointerOperand(); - } else { - LoadInst *LI = cast(&II); - MemAddr = LI->getPointerOperand(); - } - // Global variable can not be aliased with locals. - if (dyn_cast(MemAddr)) - break; - Value *Base = MemAddr->stripInBoundsConstantOffsets(); - if (!isa(Base) || Base == AI) - return false; - break; - } - default: { - IntrinsicInst *IntrInst = dyn_cast(&II); - if (IntrInst) { - if (IntrInst->isLifetimeStartOrEnd()) - break; - return false; - } - // Treat all the other cases conservatively if it has side effects. - if (II.mayHaveSideEffects()) - return false; - } - } - } + if (CEAC.doesBlockContainClobberOfAddr(BB, AI)) + return false; } - return true; } @@ -413,7 +440,8 @@ // outline region. If there are not other untracked uses of the address, return // the pair of markers if found; otherwise return a pair of nullptr. CodeExtractor::LifetimeMarkerInfo -CodeExtractor::getLifetimeMarkers(Instruction *Addr, +CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, + Instruction *Addr, BasicBlock *ExitBlock) const { LifetimeMarkerInfo Info; @@ -445,7 +473,7 @@ Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd); // Do legality check. if ((Info.SinkLifeStart || Info.HoistLifeEnd) && - !isLegalToShrinkwrapLifetimeMarkers(Addr)) + !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr)) return {}; // Check to see if we have a place to do hoisting, if not, bail. @@ -455,7 +483,8 @@ return Info; } -void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, +void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC, + ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); @@ -476,61 +505,65 @@ return true; }; - for (BasicBlock &BB : *Func) { - if (Blocks.count(&BB)) + // Look up allocas in the original function in CodeExtractorAnalysisCache, as + // this is much faster than walking all the instructions. + for (AllocaInst *AI : CEAC.getAllocas()) { + BasicBlock *BB = AI->getParent(); + if (Blocks.count(BB)) continue; - for (Instruction &II : BB) { - auto *AI = dyn_cast(&II); - if (!AI) - continue; - LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock); - bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); - if (Moved) { - LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); - SinkCands.insert(AI); - continue; - } - - // Follow any bitcasts. - SmallVector Bitcasts; - SmallVector BitcastLifetimeInfo; - for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { - Instruction *Bitcast = cast(U); - LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock); - if (LMI.LifeStart) { - Bitcasts.push_back(Bitcast); - BitcastLifetimeInfo.push_back(LMI); - continue; - } - } - - // Found unknown use of AI. - if (!definedInRegion(Blocks, U)) { - Bitcasts.clear(); - break; - } - } - - // Either no bitcasts reference the alloca or there are unknown uses. - if (Bitcasts.empty()) - continue; + // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca, + // check whether it is actually still in the original function. + Function *AIFunc = BB->getParent(); + if (AIFunc != Func) + continue; - LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock); + bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo); + if (Moved) { + LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n"); SinkCands.insert(AI); - for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { - Instruction *BitcastAddr = Bitcasts[I]; - const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; - assert(LMI.LifeStart && - "Unsafe to sink bitcast without lifetime markers"); - moveOrIgnoreLifetimeMarkers(LMI); - if (!definedInRegion(Blocks, BitcastAddr)) { - LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr - << "\n"); - SinkCands.insert(BitcastAddr); + continue; + } + + // Follow any bitcasts. + SmallVector Bitcasts; + SmallVector BitcastLifetimeInfo; + for (User *U : AI->users()) { + if (U->stripInBoundsConstantOffsets() == AI) { + Instruction *Bitcast = cast(U); + LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock); + if (LMI.LifeStart) { + Bitcasts.push_back(Bitcast); + BitcastLifetimeInfo.push_back(LMI); + continue; } } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + Bitcasts.clear(); + break; + } + } + + // Either no bitcasts reference the alloca or there are unknown uses. + if (Bitcasts.empty()) + continue; + + LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n"); + SinkCands.insert(AI); + for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) { + Instruction *BitcastAddr = Bitcasts[I]; + const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I]; + assert(LMI.LifeStart && + "Unsafe to sink bitcast without lifetime markers"); + moveOrIgnoreLifetimeMarkers(LMI); + if (!definedInRegion(Blocks, BitcastAddr)) { + LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr + << "\n"); + SinkCands.insert(BitcastAddr); + } } } } @@ -1349,7 +1382,8 @@ MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); } -Function *CodeExtractor::extractCodeRegion() { +Function * +CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) { if (!isEligible()) return nullptr; @@ -1435,7 +1469,7 @@ ValueSet inputs, outputs, SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; - findAllocas(SinkingCands, HoistingCands, CommonExit); + findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); // Find inputs to, outputs from the code region. Index: llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp =================================================================== --- llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -62,7 +62,8 @@ CodeExtractor CE(Candidates); EXPECT_TRUE(CE.isEligible()); - Function *Outlined = CE.extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*Func); + Function *Outlined = CE.extractCodeRegion(CEAC); EXPECT_TRUE(Outlined); BasicBlock *Exit = getBlockByName(Func, "notExtracted"); BasicBlock *ExitSplit = getBlockByName(Outlined, "notExtracted.split"); @@ -112,7 +113,8 @@ CodeExtractor CE(ExtractedBlocks); EXPECT_TRUE(CE.isEligible()); - Function *Outlined = CE.extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*Func); + Function *Outlined = CE.extractCodeRegion(CEAC); EXPECT_TRUE(Outlined); BasicBlock *Exit1 = getBlockByName(Func, "exit1"); BasicBlock *Exit2 = getBlockByName(Func, "exit2"); @@ -186,7 +188,8 @@ CodeExtractor CE(ExtractedBlocks); EXPECT_TRUE(CE.isEligible()); - Function *Outlined = CE.extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*Func); + Function *Outlined = CE.extractCodeRegion(CEAC); EXPECT_TRUE(Outlined); EXPECT_FALSE(verifyFunction(*Outlined, &errs())); EXPECT_FALSE(verifyFunction(*Func, &errs())); @@ -220,7 +223,8 @@ CodeExtractor CE(Blocks); EXPECT_TRUE(CE.isEligible()); - Function *Outlined = CE.extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*Func); + Function *Outlined = CE.extractCodeRegion(CEAC); EXPECT_TRUE(Outlined); EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func)); @@ -271,7 +275,8 @@ CodeExtractor CE(Blocks, nullptr, false, nullptr, nullptr, &AC); EXPECT_TRUE(CE.isEligible()); - Function *Outlined = CE.extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*Func); + Function *Outlined = CE.extractCodeRegion(CEAC); EXPECT_TRUE(Outlined); EXPECT_FALSE(verifyFunction(*Outlined)); EXPECT_FALSE(verifyFunction(*Func));