Index: include/llvm/Analysis/MemoryDependenceAnalysis.h =================================================================== --- include/llvm/Analysis/MemoryDependenceAnalysis.h +++ include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -350,6 +350,11 @@ DominatorTree &DT) : AA(AA), AC(AC), TLI(TLI), DT(DT) {} + /// Some methods limit the number of instructions they will examine. + /// The return value of this method is the default limit that will be + /// used if no limit is explicitly passed in. + unsigned getDefaultBlockScanLimit() const; + /// Returns the instruction on which a memory operation depends. /// /// See the class comment for more details. It is illegal to call this on @@ -409,19 +414,25 @@ /// operations. If isLoad is false, this routine ignores may-aliases /// with reads from read-only locations. If possible, pass the query /// instruction as well; this function may take advantage of the metadata - /// annotated to the query instruction to refine the result. + /// annotated to the query instruction to refine the result. \p Limit + /// can be used to set the maximum number of instructions that will be + /// examined to find the pointer dependency. On return, it will be set to + /// the number of instructions left to examine. If a null pointer is passed + /// in, the limit will default to the value of -memdep-block-scan-limit. /// /// Note that this is an uncached query, and thus may be inefficient. MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, - Instruction *QueryInst = nullptr); + Instruction *QueryInst = nullptr, + unsigned *Limit = nullptr); MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, - Instruction *QueryInst); + Instruction *QueryInst, + unsigned *Limit = nullptr); /// This analysis looks for other loads and stores with invariant.group /// metadata and the same pointer operand. Returns Unknown if it does not Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -327,7 +327,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { if (QueryInst != nullptr) { if (auto *LI = dyn_cast(QueryInst)) { @@ -338,7 +338,8 @@ return invariantGroupDependency; } } - return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, + Limit); } MemDepResult @@ -394,13 +395,18 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst) { + BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; + if (!Limit) { + unsigned DefaultLimit = BlockScanLimit; + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, + &DefaultLimit); + } + // We must be careful with atomic accesses, as they may allow another thread // to touch this location, clobbering it. We are conservative: if the // QueryInst is not a simple (non-atomic) memory access, we automatically @@ -474,8 +480,8 @@ // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. - --Limit; - if (!Limit) + --*Limit; + if (!*Limit) return MemDepResult::getUnknown(); if (IntrinsicInst *II = dyn_cast(Inst)) { @@ -1698,6 +1704,10 @@ AU.addRequiredTransitive(); } +unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const { + return BlockScanLimit; +} + bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AA = getAnalysis().getAAResults(); auto &AC = getAnalysis().getAssumptionCache(F); Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1051,6 +1051,13 @@ if (!Loc.Ptr) continue; + // Loop until we find a store we can eliminate or a load that + // invalidates the analysis. Without an upper bound on the number of + // instructions examined, this analysis can become very time-consuming. + // However, the potential gain diminishes as we process more instructions + // without eliminating any of them. Therefore, we limit the number of + // instructions we look at. + auto Limit = MD->getDefaultBlockScanLimit(); while (InstDep.isDef() || InstDep.isClobber()) { // Get the memory clobbered by the instruction we depend on. MemDep will // skip any instructions that 'Loc' clearly doesn't interact with. If we @@ -1139,8 +1146,9 @@ if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) break; - InstDep = MD->getPointerDependencyFrom(Loc, false, - DepWrite->getIterator(), &BB); + InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, + DepWrite->getIterator(), &BB, + /*QueryInst=*/ nullptr, &Limit); } }