diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -439,23 +439,28 @@ /// operations. If isLoad is false, this routine ignores may-aliases /// with reads from read-only locations. If possible, pass the query /// instruction as well; this function may take advantage of the metadata - /// annotated to the query instruction to refine the result. \p Limit + /// annotated to the query instruction to refine the result. \p LocalLimit /// can be used to set the maximum number of instructions that will be - /// examined to find the pointer dependency. On return, it will be set to - /// the number of instructions left to examine. If a null pointer is passed - /// in, the limit will default to the value of -memdep-block-scan-limit. + /// examined in a single block to find the pointer dependency. On return, + /// it will be set to the number of instructions left to examine. + /// If a null pointer is passed in, the limit will default to the value + /// of -memdep-block-scan-limit. + /// \p GlobalLimit can be used to set the total maximum number of instructions + /// that will be examined to find the pointer dependency. /// /// Note that this is an uncached query, and thus may be inefficient. MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst = nullptr, - unsigned *Limit = nullptr); + unsigned *LocalLimit = nullptr, + unsigned *GlobalLimit = nullptr); MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, - Instruction *QueryInst, unsigned *Limit); + Instruction *QueryInst, unsigned *LocalLimit, + unsigned *GlobalLimit); /// This analysis looks for other loads and stores with invariant.group /// metadata and the same pointer operand. Returns Unknown if it does not @@ -472,18 +477,17 @@ MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB); - bool getNonLocalPointerDepFromBB(Instruction *QueryInst, - const PHITransAddr &Pointer, - const MemoryLocation &Loc, bool isLoad, - BasicBlock *BB, - SmallVectorImpl &Result, - DenseMap &Visited, - bool SkipFirstBlock = false, - bool IsIncomplete = false); + bool getNonLocalPointerDepFromBB( + Instruction *QueryInst, const PHITransAddr &Pointer, + const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, + SmallVectorImpl &Result, + DenseMap &Visited, unsigned *GlobalLimit, + bool SkipFirstBlock = false, bool IsIncomplete = false); MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, - unsigned NumSortedEntries); + unsigned NumSortedEntries, + unsigned *GlobalLimit); void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P); diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -76,6 +77,14 @@ STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); +STATISTIC(NumBlockInstructionsScannedMax, + "Maxmimal number of per-block instructions scanned in non-local " + "memory dependency analysis"); +STATISTIC(NumBlocksScannedMax, "Maxmimal number of blocks scanned in non-local " + "memory dependency analysis"); +STATISTIC(NumInstructionsScannedMax, "Maxmimal number of instructions scanned " + "in non-local memory dependency analysis"); + // Limit for the number of instructions to scan in a block. static cl::opt BlockScanLimit( @@ -88,8 +97,19 @@ cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)")); -// Limit on the number of memdep results to process. -static const unsigned int NumResultsLimit = 100; +// In each of BlockNumberLimit block we are willing to scan up to BlockScanLimit +// instructions, but the total count of instructions scanned in all blocks is +// at most TotalInstructionCountLimit. +// This is related to the NumInstructionsScannedMax statistic. +static cl::opt TotalInstructionCountLimit( + "memdep-total-instruction-count-limit", cl::Hidden, cl::init(100000), + cl::desc("The number of instructions we are allowed to scan during memory " + "dependency analysis (default = 100000)")); + +static cl::opt NumResultsLimit( + "memdep-num-results-limit", cl::Hidden, cl::init(100), + cl::desc( + "Limit on the number of memdep results to process (default = 100)")); /// This is a helper function that removes Val from 'Inst's set in ReverseMap. /// @@ -183,7 +203,7 @@ MemDepResult MemoryDependenceResults::getCallDependencyFrom( CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, BasicBlock *BB) { - unsigned Limit = getDefaultBlockScanLimit(); + unsigned LocalLimit = getDefaultBlockScanLimit(); // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -194,8 +214,8 @@ // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. - --Limit; - if (!Limit) + --LocalLimit; + if (!LocalLimit) return MemDepResult::getUnknown(); // If this inst is a memory op, get the pointer it accessed @@ -249,7 +269,8 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + BasicBlock *BB, Instruction *QueryInst, unsigned *LocalLimit, + unsigned *GlobalLimit) { MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast(QueryInst)) { @@ -260,7 +281,7 @@ } } MemDepResult SimpleDep = getSimplePointerDependencyFrom( - MemLoc, isLoad, ScanIt, BB, QueryInst, Limit); + MemLoc, isLoad, ScanIt, BB, QueryInst, LocalLimit, GlobalLimit); if (SimpleDep.isDef()) return SimpleDep; // Non-local invariant group dependency indicates there is non local Def @@ -361,12 +382,20 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, - BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { + BasicBlock *BB, Instruction *QueryInst, unsigned *LocalLimit, + unsigned *GlobalLimit) { bool isInvariantLoad = false; - unsigned DefaultLimit = getDefaultBlockScanLimit(); - if (!Limit) - Limit = &DefaultLimit; + unsigned DefaultLocalLimit = getDefaultBlockScanLimit(); + unsigned DefaultGlobalLimit = getDefaultBlockScanLimit(); + if (!LocalLimit) + LocalLimit = &DefaultLocalLimit; + if (!GlobalLimit) + GlobalLimit = &DefaultGlobalLimit; + + auto _ = make_scope_exit([&, Orig = *LocalLimit]() { + NumBlockInstructionsScannedMax.updateMax(Orig - *LocalLimit); + }); // We must be careful with atomic accesses, as they may allow another thread // to touch this location, clobbering it. We are conservative: if the @@ -435,9 +464,10 @@ // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. - --*Limit; - if (!*Limit) + if (!*LocalLimit || !*GlobalLimit) return MemDepResult::getUnknown(); + --*LocalLimit; + --*GlobalLimit; if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here @@ -872,8 +902,14 @@ // a block with multiple different pointers. This can happen during PHI // translation. DenseMap Visited; - if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, - Result, Visited, true)) + unsigned GlobalLimit = TotalInstructionCountLimit; + auto _ = make_scope_exit([&, Orig = GlobalLimit]() { + NumInstructionsScannedMax.updateMax(Orig - GlobalLimit); + }); + bool Success = + getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB, + Result, Visited, &GlobalLimit, true); + if (Success) return; Result.clear(); Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(), @@ -887,7 +923,8 @@ /// If we do a lookup, add the result to the cache. MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock( Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad, - BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries, + unsigned *GlobalLimit) { bool isInvariantLoad = false; @@ -937,8 +974,8 @@ } // Scan the block for the dependency. - MemDepResult Dep = - getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst); + MemDepResult Dep = getPointerDependencyFrom( + Loc, isLoad, ScanPos, BB, QueryInst, /*LocalLimit=*/nullptr, GlobalLimit); // Don't cache results for invariant load. if (isInvariantLoad) @@ -1020,8 +1057,8 @@ Instruction *QueryInst, const PHITransAddr &Pointer, const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB, SmallVectorImpl &Result, - DenseMap &Visited, bool SkipFirstBlock, - bool IsIncomplete) { + DenseMap &Visited, unsigned *GlobalLimit, + bool SkipFirstBlock, bool IsIncomplete) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -1079,7 +1116,8 @@ // the query using the greater size. return getNonLocalPointerDepFromBB( QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, - StartBB, Result, Visited, SkipFirstBlock, IsIncomplete); + StartBB, Result, Visited, GlobalLimit, SkipFirstBlock, + IsIncomplete); } } @@ -1102,7 +1140,7 @@ if (Loc.AATags) return getNonLocalPointerDepFromBB( QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, - Visited, SkipFirstBlock, IsIncomplete); + Visited, GlobalLimit, SkipFirstBlock, IsIncomplete); } } @@ -1178,8 +1216,11 @@ // won't get any reuse from currently inserted values, because we don't // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); - unsigned WorklistEntries = BlockNumberLimit; bool GotWorklistLimit = false; + unsigned BlockBudget = BlockNumberLimit; + auto _ = make_scope_exit([&, Orig = BlockBudget]() { + NumBlocksScannedMax.updateMax(Orig - BlockBudget); + }); LLVM_DEBUG(AssertSorted(*Cache)); while (!Worklist.empty()) { @@ -1212,8 +1253,8 @@ // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB, - Cache, NumSortedEntries); + MemDepResult Dep = GetNonLocalInfoForBlock( + QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, GlobalLimit); // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal()) { @@ -1252,7 +1293,7 @@ goto PredTranslationFailure; } } - if (NewBlocks.size() > WorklistEntries) { + if (NewBlocks.size() > BlockBudget) { // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. for (unsigned i = 0; i < NewBlocks.size(); i++) @@ -1260,7 +1301,7 @@ GotWorklistLimit = true; goto PredTranslationFailure; } - WorklistEntries -= NewBlocks.size(); + BlockBudget -= NewBlocks.size(); Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } @@ -1349,8 +1390,8 @@ // assume it is unknown, but this also does not block PRE of the load. if (!CanTranslate || !getNonLocalPointerDepFromBB(QueryInst, PredPointer, - Loc.getWithNewPtr(PredPtrVal), isLoad, - Pred, Result, Visited)) { + Loc.getWithNewPtr(PredPtrVal), isLoad, + Pred, Result, Visited, GlobalLimit)) { // Add the entry to the Result list. NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); Result.push_back(Entry);