Skip to content

Commit 9852699

Browse files
committedOct 8, 2019
[CodeExtractor] Factor out and reuse shrinkwrap analysis
Factor out CodeExtractor's analysis of allocas (for shrinkwrapping purposes), and allow the analysis to be reused. This resolves a quadratic compile-time bug observed when compiling AMDGPUDisassembler.cpp.o. Pre-patch (Release + LTO clang): ``` ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name --- 176.5278 ( 57.8%) 0.4915 ( 18.5%) 177.0192 ( 57.4%) 177.4112 ( 57.3%) Hot Cold Splitting ``` Post-patch (ReleaseAsserts clang): ``` ---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name --- 1.4051 ( 3.3%) 0.0079 ( 0.3%) 1.4129 ( 3.2%) 1.4129 ( 3.2%) Hot Cold Splitting ``` Testing: check-llvm, and comparing the AMDGPUDisassembler.cpp.o binary pre- vs. post-patch. An alternate approach is to hide CodeExtractorAnalysisCache from clients of CodeExtractor, and to recompute the analysis from scratch inside of CodeExtractor::extractCodeRegion(). This eliminates some redundant work in the shrinkwrapping legality check. However, some clients continue to exhibit O(n^2) compile time behavior as computing the analysis is O(n). rdar://55912966 Differential Revision: https://reviews.llvm.org/D68616 llvm-svn: 374089
1 parent d8245e7 commit 9852699

File tree

8 files changed

+206
-118
lines changed

8 files changed

+206
-118
lines changed
 

‎llvm/include/llvm/Transforms/IPO/HotColdSplitting.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class TargetTransformInfo;
2323
class OptimizationRemarkEmitter;
2424
class AssumptionCache;
2525
class DominatorTree;
26+
class CodeExtractorAnalysisCache;
2627

2728
/// A sequence of basic blocks.
2829
///
@@ -43,8 +44,10 @@ class HotColdSplitting {
4344
bool isFunctionCold(const Function &F) const;
4445
bool shouldOutlineFrom(const Function &F) const;
4546
bool outlineColdRegions(Function &F, bool HasProfileSummary);
46-
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
47-
BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
47+
Function *extractColdRegion(const BlockSequence &Region,
48+
const CodeExtractorAnalysisCache &CEAC,
49+
DominatorTree &DT, BlockFrequencyInfo *BFI,
50+
TargetTransformInfo &TTI,
4851
OptimizationRemarkEmitter &ORE,
4952
AssumptionCache *AC, unsigned Count);
5053
ProfileSummaryInfo *PSI;

‎llvm/include/llvm/Transforms/Utils/CodeExtractor.h

+42-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
namespace llvm {
2424

25+
class AllocaInst;
2526
class BasicBlock;
2627
class BlockFrequency;
2728
class BlockFrequencyInfo;
@@ -36,6 +37,38 @@ class Module;
3637
class Type;
3738
class Value;
3839

40+
/// A cache for the CodeExtractor analysis. The operation \ref
41+
/// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this
42+
/// object. This object should conservatively be considered invalid if any
43+
/// other mutating operations on the IR occur.
44+
///
45+
/// Constructing this object is O(n) in the size of the function.
46+
class CodeExtractorAnalysisCache {
47+
/// The allocas in the function.
48+
SmallVector<AllocaInst *, 16> Allocas;
49+
50+
/// Base memory addresses of load/store instructions, grouped by block.
51+
DenseMap<BasicBlock *, DenseSet<Value *>> BaseMemAddrs;
52+
53+
/// Blocks which contain instructions which may have unknown side-effects
54+
/// on memory.
55+
DenseSet<BasicBlock *> SideEffectingBlocks;
56+
57+
void findSideEffectInfoForBlock(BasicBlock &BB);
58+
59+
public:
60+
CodeExtractorAnalysisCache(Function &F);
61+
62+
/// Get the allocas in the function at the time the analysis was created.
63+
/// Note that some of these allocas may no longer be present in the function,
64+
/// due to \ref CodeExtractor::extractCodeRegion.
65+
ArrayRef<AllocaInst *> getAllocas() const { return Allocas; }
66+
67+
/// Check whether \p BB contains an instruction thought to load from, store
68+
/// to, or otherwise clobber the alloca \p Addr.
69+
bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const;
70+
};
71+
3972
/// Utility class for extracting code into a new function.
4073
///
4174
/// This utility provides a simple interface for extracting some sequence of
@@ -104,7 +137,7 @@ class Value;
104137
///
105138
/// Returns zero when called on a CodeExtractor instance where isEligible
106139
/// returns false.
107-
Function *extractCodeRegion();
140+
Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
108141

109142
/// Verify that assumption cache isn't stale after a region is extracted.
110143
/// Returns false when verifier finds errors. AssumptionCache is passed as
@@ -135,7 +168,9 @@ class Value;
135168
/// region.
136169
///
137170
/// Returns true if it is safe to do the code motion.
138-
bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const;
171+
bool
172+
isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
173+
Instruction *AllocaAddr) const;
139174

140175
/// Find the set of allocas whose life ranges are contained within the
141176
/// outlined region.
@@ -145,7 +180,8 @@ class Value;
145180
/// are used by the lifetime markers are also candidates for shrink-
146181
/// wrapping. The instructions that need to be sunk are collected in
147182
/// 'Allocas'.
148-
void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
183+
void findAllocas(const CodeExtractorAnalysisCache &CEAC,
184+
ValueSet &SinkCands, ValueSet &HoistCands,
149185
BasicBlock *&ExitBlock) const;
150186

151187
/// Find or create a block within the outline region for placing hoisted
@@ -166,8 +202,9 @@ class Value;
166202
Instruction *LifeEnd = nullptr;
167203
};
168204

169-
LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr,
170-
BasicBlock *ExitBlock) const;
205+
LifetimeMarkerInfo
206+
getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
207+
Instruction *Addr, BasicBlock *ExitBlock) const;
171208

172209
void severSplitPHINodesOfEntry(BasicBlock *&Header);
173210
void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);

‎llvm/lib/Transforms/IPO/BlockExtractor.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) {
206206
++NumExtracted;
207207
Changed = true;
208208
}
209-
Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
209+
CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
210+
Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
210211
if (F)
211212
LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
212213
<< "' in: " << F->getName() << '\n');

‎llvm/lib/Transforms/IPO/HotColdSplitting.cpp

+14-12
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,10 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
290290
return Penalty;
291291
}
292292

293-
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
294-
DominatorTree &DT,
295-
BlockFrequencyInfo *BFI,
296-
TargetTransformInfo &TTI,
297-
OptimizationRemarkEmitter &ORE,
298-
AssumptionCache *AC,
299-
unsigned Count) {
293+
Function *HotColdSplitting::extractColdRegion(
294+
const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
295+
DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
296+
OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
300297
assert(!Region.empty());
301298

302299
// TODO: Pass BFI and BPI to update profile information.
@@ -318,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
318315
return nullptr;
319316

320317
Function *OrigF = Region[0]->getParent();
321-
if (Function *OutF = CE.extractCodeRegion()) {
318+
if (Function *OutF = CE.extractCodeRegion(CEAC)) {
322319
User *U = *OutF->user_begin();
323320
CallInst *CI = cast<CallInst>(U);
324321
CallSite CS(CI);
@@ -606,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
606603
}
607604
}
608605

606+
if (OutliningWorklist.empty())
607+
return Changed;
608+
609609
// Outline single-entry cold regions, splitting up larger regions as needed.
610610
unsigned OutlinedFunctionID = 1;
611-
while (!OutliningWorklist.empty()) {
611+
// Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
612+
CodeExtractorAnalysisCache CEAC(F);
613+
do {
612614
OutliningRegion Region = OutliningWorklist.pop_back_val();
613615
assert(!Region.empty() && "Empty outlining region in worklist");
614616
do {
@@ -619,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
619621
BB->dump();
620622
});
621623

622-
Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
623-
OutlinedFunctionID);
624+
Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
625+
ORE, AC, OutlinedFunctionID);
624626
if (Outlined) {
625627
++OutlinedFunctionID;
626628
Changed = true;
627629
}
628630
} while (!Region.empty());
629-
}
631+
} while (!OutliningWorklist.empty());
630632

631633
return Changed;
632634
}

‎llvm/lib/Transforms/IPO/LoopExtractor.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
141141
if (NumLoops == 0) return Changed;
142142
--NumLoops;
143143
AssumptionCache *AC = nullptr;
144+
Function &Func = *L->getHeader()->getParent();
144145
if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
145-
AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
146+
AC = ACT->lookupAssumptionCache(Func);
147+
CodeExtractorAnalysisCache CEAC(Func);
146148
CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
147-
if (Extractor.extractCodeRegion() != nullptr) {
149+
if (Extractor.extractCodeRegion(CEAC) != nullptr) {
148150
Changed = true;
149151
// After extraction, the loop is replaced by a function call, so
150152
// we shouldn't try to run any more loop passes on it.

‎llvm/lib/Transforms/IPO/PartialInlining.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
11221122
BranchProbabilityInfo BPI(*ClonedFunc, LI);
11231123
ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11241124

1125+
// Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1126+
CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1127+
11251128
SetVector<Value *> Inputs, Outputs, Sinks;
11261129
for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
11271130
ClonedOMRI->ORI) {
@@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
11481151
if (Outputs.size() > 0 && !ForceLiveExit)
11491152
continue;
11501153

1151-
Function *OutlinedFunc = CE.extractCodeRegion();
1154+
Function *OutlinedFunc = CE.extractCodeRegion(CEAC);
11521155

11531156
if (OutlinedFunc) {
11541157
CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
@@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
12101213
}
12111214

12121215
// Extract the body of the if.
1216+
CodeExtractorAnalysisCache CEAC(*ClonedFunc);
12131217
Function *OutlinedFunc =
12141218
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
12151219
ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
12161220
/* AllowVarargs */ true)
1217-
.extractCodeRegion();
1221+
.extractCodeRegion(CEAC);
12181222

12191223
if (OutlinedFunc) {
12201224
BasicBlock *OutliningCallBB =

0 commit comments

Comments
 (0)
Please sign in to comment.