Skip to content

Commit 3db1764

Browse files
committedAug 26, 2016
limit the number of instructions per block examined by dead store elimination
Summary: Dead store elimination gets very expensive when large numbers of instructions need to be analyzed. This patch limits the number of instructions analyzed per store to the value of the memdep-block-scan-limit parameter (which defaults to 100). This resulted in no observed difference in performance of the generated code, and no change in the statistics for the dead store elimination pass, but improved compilation time on some files by more than an order of magnitude. Reviewers: dexonsmith, bruno, george.burgess.iv, dberlin, reames, davidxl Subscribers: davide, chandlerc, dberlin, davidxl, eraman, tejohnson, mbodart, llvm-commits Differential Revision: https://reviews.llvm.org/D15537 llvm-svn: 279833
1 parent d1e020f commit 3db1764

File tree

3 files changed

+41
-11
lines changed

3 files changed

+41
-11
lines changed
 

‎llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,11 @@ class MemoryDependenceResults {
350350
DominatorTree &DT)
351351
: AA(AA), AC(AC), TLI(TLI), DT(DT) {}
352352

353+
/// Some methods limit the number of instructions they will examine.
354+
/// The return value of this method is the default limit that will be
355+
/// used if no limit is explicitly passed in.
356+
unsigned getDefaultBlockScanLimit() const;
357+
353358
/// Returns the instruction on which a memory operation depends.
354359
///
355360
/// See the class comment for more details. It is illegal to call this on
@@ -409,19 +414,25 @@ class MemoryDependenceResults {
409414
/// operations. If isLoad is false, this routine ignores may-aliases
410415
/// with reads from read-only locations. If possible, pass the query
411416
/// instruction as well; this function may take advantage of the metadata
412-
/// annotated to the query instruction to refine the result.
417+
/// annotated to the query instruction to refine the result. \p Limit
418+
/// can be used to set the maximum number of instructions that will be
419+
/// examined to find the pointer dependency. On return, it will be set to
420+
/// the number of instructions left to examine. If a null pointer is passed
421+
/// in, the limit will default to the value of -memdep-block-scan-limit.
413422
///
414423
/// Note that this is an uncached query, and thus may be inefficient.
415424
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad,
416425
BasicBlock::iterator ScanIt,
417426
BasicBlock *BB,
418-
Instruction *QueryInst = nullptr);
427+
Instruction *QueryInst = nullptr,
428+
unsigned *Limit = nullptr);
419429

420430
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
421431
bool isLoad,
422432
BasicBlock::iterator ScanIt,
423433
BasicBlock *BB,
424-
Instruction *QueryInst);
434+
Instruction *QueryInst,
435+
unsigned *Limit = nullptr);
425436

426437
/// This analysis looks for other loads and stores with invariant.group
427438
/// metadata and the same pointer operand. Returns Unknown if it does not

‎llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ static bool isVolatile(Instruction *Inst) {
341341

342342
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
343343
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
344-
BasicBlock *BB, Instruction *QueryInst) {
344+
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
345345

346346
if (QueryInst != nullptr) {
347347
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -352,7 +352,8 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
352352
return invariantGroupDependency;
353353
}
354354
}
355-
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
355+
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
356+
Limit);
356357
}
357358

358359
MemDepResult
@@ -408,12 +409,18 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
408409

409410
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
410411
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
411-
BasicBlock *BB, Instruction *QueryInst) {
412+
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
413+
412414
const Value *MemLocBase = nullptr;
413415
int64_t MemLocOffset = 0;
414-
unsigned Limit = BlockScanLimit;
415416
bool isInvariantLoad = false;
416417

418+
if (!Limit) {
419+
unsigned DefaultLimit = BlockScanLimit;
420+
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
421+
&DefaultLimit);
422+
}
423+
417424
// We must be careful with atomic accesses, as they may allow another thread
418425
// to touch this location, clobbering it. We are conservative: if the
419426
// QueryInst is not a simple (non-atomic) memory access, we automatically
@@ -487,8 +494,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
487494

488495
// Limit the amount of scanning we do so we don't end up with quadratic
489496
// running time on extreme testcases.
490-
--Limit;
491-
if (!Limit)
497+
--*Limit;
498+
if (!*Limit)
492499
return MemDepResult::getUnknown();
493500

494501
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -1712,6 +1719,10 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
17121719
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
17131720
}
17141721

1722+
unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
1723+
return BlockScanLimit;
1724+
}
1725+
17151726
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
17161727
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
17171728
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

‎llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
10501050
if (!Loc.Ptr)
10511051
continue;
10521052

1053+
// Loop until we find a store we can eliminate or a load that
1054+
// invalidates the analysis. Without an upper bound on the number of
1055+
// instructions examined, this analysis can become very time-consuming.
1056+
// However, the potential gain diminishes as we process more instructions
1057+
// without eliminating any of them. Therefore, we limit the number of
1058+
// instructions we look at.
1059+
auto Limit = MD->getDefaultBlockScanLimit();
10531060
while (InstDep.isDef() || InstDep.isClobber()) {
10541061
// Get the memory clobbered by the instruction we depend on. MemDep will
10551062
// skip any instructions that 'Loc' clearly doesn't interact with. If we
@@ -1138,8 +1145,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
11381145
if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
11391146
break;
11401147

1141-
InstDep = MD->getPointerDependencyFrom(Loc, false,
1142-
DepWrite->getIterator(), &BB);
1148+
InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
1149+
DepWrite->getIterator(), &BB,
1150+
/*QueryInst=*/ nullptr, &Limit);
11431151
}
11441152
}
11451153

0 commit comments

Comments
 (0)
Please sign in to comment.