diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -111,12 +111,29 @@ MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); +static cl::opt MemorySSAUpwardsStepLimit( + "dse-memoryssa-walklimit", cl::init(70), cl::Hidden, + cl::desc("The maximum number of steps while walking upwards to find " + "MemoryDefs that may be killed (default = 70)")); static cl::opt MemorySSADefsPerBlockLimit( "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)")); +static cl::opt MemorySSASameBBStepCost( + "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, + cl::desc( + "The cost of a step in the same basic block as the killing MemoryDef" + "(default = 1)")); + +static cl::opt + MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), + cl::Hidden, + cl::desc("The cost of a step in a different basic " + "block than the killing MemoryDef" + "(default = 5)")); + static cl::opt MemorySSAPathCheckLimit( "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " @@ -1442,16 +1459,17 @@ // in between both MemoryDefs. A bit more concretely: // // For all MemoryDefs StartDef: -// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking +// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking // upwards. -// 2. Check that there are no reads between DomAccess and the StartDef by -// checking all uses starting at DomAccess and walking until we see StartDef. -// 3. For each found DomDef, check that: -// 1. There are no barrier instructions between DomDef and StartDef (like +// 2. Check that there are no reads between EarlierAccess and the StartDef by +// checking all uses starting at EarlierAccess and walking until we see +// StartDef. +// 3. For each found EarlierDef, check that: +// 1. There are no barrier instructions between EarlierDef and StartDef (like // throws or stores with ordering constraints). -// 2. StartDef is executed whenever DomDef is executed. -// 3. StartDef completely overwrites DomDef. -// 4. Erase DomDef from the function and MemorySSA. +// 2. StartDef is executed whenever EarlierDef is executed. +// 3. StartDef completely overwrites EarlierDef. +// 4. Erase EarlierDef from the function and MemorySSA. // Returns true if \p M is an intrisnic that does not read or write memory. bool isNoopIntrinsic(MemoryUseOrDef *M) { @@ -1792,13 +1810,12 @@ Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, MemoryLocation DefLoc, const Value *DefUO, CheckCache &Cache, - unsigned &ScanLimit) { - if (ScanLimit == 0) { + unsigned &ScanLimit, unsigned &WalkerStepLimit) { + if (ScanLimit == 0 || WalkerStepLimit == 0) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return None; } - MemoryAccess *DomAccess; MemoryAccess *StartAccess = Current; bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current @@ -1810,42 +1827,42 @@ if (MSSA.isLiveOnEntryDef(Current)) return None; - if (isa(Current)) { - DomAccess = Current; - break; - } - MemoryUseOrDef *CurrentUD = cast(Current); - // Look for access that clobber DefLoc. - DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD, - DefLoc); - if (MSSA.isLiveOnEntryDef(DomAccess)) + // Cost of a step. Accesses in the same block are more likely to be valid + // candidates for elimination, hence consider them cheaper. + unsigned StepCost = KillingDef->getBlock() == Current->getBlock() + ? MemorySSASameBBStepCost + : MemorySSAOtherBBStepCost; + if (WalkerStepLimit <= StepCost) return None; + WalkerStepLimit -= StepCost; - if (isa(DomAccess)) + if (isa(Current)) break; - // Check if we can skip DomDef for DSE. - MemoryDef *DomDef = dyn_cast(DomAccess); - if (DomDef && canSkipDef(DomDef, !isInvisibleToCallerBeforeRet(DefUO))) { + // Check if we can skip EarlierDef for DSE. + MemoryDef *CurrentDef = dyn_cast(Current); + if (CurrentDef && + canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) { StepAgain = true; - Current = DomDef->getDefiningAccess(); + Current = CurrentDef->getDefiningAccess(); } - } while (StepAgain); + MemoryAccess *EarlierAccess = Current; // Accesses to objects accessible after the function returns can only be // eliminated if the access is killed along all paths to the exit. Collect // the blocks with killing (=completely overwriting MemoryDefs) and check if - // they cover all paths from DomAccess to any function exit. + // they cover all paths from EarlierAccess to any function exit. SmallPtrSet KillingDefs; KillingDefs.insert(KillingDef->getMemoryInst()); - Instruction *DomMemInst = isa(DomAccess) - ? cast(DomAccess)->getMemoryInst() - : nullptr; + Instruction *EarlierMemInst = + isa(EarlierAccess) + ? cast(EarlierAccess)->getMemoryInst() + : nullptr; LLVM_DEBUG({ - dbgs() << " Checking for reads of " << *DomAccess; - if (DomMemInst) - dbgs() << " (" << *DomMemInst << ")\n"; + dbgs() << " Checking for reads of " << *EarlierAccess; + if (EarlierMemInst) + dbgs() << " (" << *EarlierMemInst << ")\n"; else dbgs() << ")\n"; }); @@ -1855,14 +1872,14 @@ for (Use &U : Acc->uses()) WorkList.insert(cast(U.getUser())); }; - PushMemUses(DomAccess); + PushMemUses(EarlierAccess); // Optimistically collect all accesses for reads. If we do not find any // read clobbers, add them to the cache. SmallPtrSet KnownNoReads; - if (!DomMemInst || !DomMemInst->mayReadFromMemory()) - KnownNoReads.insert(DomAccess); - // Check if DomDef may be read. + if (!EarlierMemInst || !EarlierMemInst->mayReadFromMemory()) + KnownNoReads.insert(EarlierAccess); + // Check if EarlierDef may be read. for (unsigned I = 0; I < WorkList.size(); I++) { MemoryAccess *UseAccess = WorkList[I]; @@ -1928,15 +1945,15 @@ LLVM_DEBUG(dbgs() << " ... found read clobber\n"); Cache.KnownReads.insert(UseAccess); Cache.KnownReads.insert(StartAccess); - Cache.KnownReads.insert(DomAccess); + Cache.KnownReads.insert(EarlierAccess); return None; } - // For the KillingDef and DomAccess we only have to check if it reads the - // memory location. + // For the KillingDef and EarlierAccess we only have to check if it reads + // the memory location. // TODO: It would probably be better to check for self-reads before // calling the function. - if (KillingDef == UseAccess || DomAccess == UseAccess) { + if (KillingDef == UseAccess || EarlierAccess == UseAccess) { LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); continue; } @@ -1945,7 +1962,7 @@ // the original location. Otherwise we have to check uses of *all* // MemoryDefs we discover, including non-aliasing ones. Otherwise we might // miss cases like the following - // 1 = Def(LoE) ; <----- DomDef stores [0,1] + // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. // (The Use points to the *first* Def it may alias) @@ -1953,10 +1970,11 @@ // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { if (isCompleteOverwrite(DefLoc, UseInst)) { - if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != DomAccess) { + if (!isInvisibleToCallerAfterRet(DefUO) && + UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); if (PostOrderNumbers.find(MaybeKillingBlock)->second < - PostOrderNumbers.find(DomAccess->getBlock())->second) { + PostOrderNumbers.find(EarlierAccess->getBlock())->second) { LLVM_DEBUG(dbgs() << " ... found killing def " << *UseInst << "\n"); @@ -1969,8 +1987,8 @@ } // For accesses to locations visible after the function returns, make sure - // that the location is killed (=overwritten) along all paths from DomAccess - // to the exit. + // that the location is killed (=overwritten) along all paths from + // EarlierAccess to the exit. if (!isInvisibleToCallerAfterRet(DefUO)) { SmallPtrSet KillingBlocks; for (Instruction *KD : KillingDefs) @@ -1988,22 +2006,23 @@ } // If CommonPred is in the set of killing blocks, just check if it - // post-dominates DomAccess. + // post-dominates EarlierAccess. if (KillingBlocks.count(CommonPred)) { - if (PDT.dominates(CommonPred, DomAccess->getBlock())) - return {DomAccess}; + if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) + return {EarlierAccess}; return None; } - // If the common post-dominator does not post-dominate DomAccess, there - // is a path from DomAccess to an exit not going through a killing block. - if (PDT.dominates(CommonPred, DomAccess->getBlock())) { + // If the common post-dominator does not post-dominate EarlierAccess, + // there is a path from EarlierAccess to an exit not going through a + // killing block. + if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) { SetVector WorkList; - // DomAccess's post-order number provides an upper bound of the blocks - // on a path starting at DomAccess. + // EarlierAccess's post-order number provides an upper bound of the + // blocks on a path starting at EarlierAccess. unsigned UpperBound = - PostOrderNumbers.find(DomAccess->getBlock())->second; + PostOrderNumbers.find(EarlierAccess->getBlock())->second; // If CommonPred is null, there are multiple exits from the function. // They all have to be added to the worklist. @@ -2015,22 +2034,22 @@ NumCFGTries++; // Check if all paths starting from an exit node go through one of the - // killing blocks before reaching DomAccess. + // killing blocks before reaching EarlierAccess. for (unsigned I = 0; I < WorkList.size(); I++) { NumCFGChecks++; BasicBlock *Current = WorkList[I]; if (KillingBlocks.count(Current)) continue; - if (Current == DomAccess->getBlock()) + if (Current == EarlierAccess->getBlock()) return None; - // DomAccess is reachable from the entry, so we don't have to explore - // unreachable blocks further. + // EarlierAccess is reachable from the entry, so we don't have to + // explore unreachable blocks further. if (!DT.isReachableFromEntry(Current)) continue; unsigned CPO = PostOrderNumbers.find(Current)->second; - // Current block is not on a path starting at DomAccess. + // Current block is not on a path starting at EarlierAccess. if (CPO > UpperBound) continue; for (BasicBlock *Pred : predecessors(Current)) @@ -2040,14 +2059,15 @@ return None; } NumCFGSuccess++; - return {DomAccess}; + return {EarlierAccess}; } return None; } - // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. + // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is + // potentially dead. Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end()); - return {DomAccess}; + return {EarlierAccess}; } // Delete dead memory defs @@ -2248,6 +2268,7 @@ << *KillingDef << " (" << *SI << ")\n"); unsigned ScanLimit = MemorySSAScanLimit; + unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. SetVector ToCheck; ToCheck.insert(KillingDef->getDefiningAccess()); @@ -2259,22 +2280,23 @@ if (State.SkipStores.count(Current)) continue; - Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit); + Optional Next = + State.getDomMemoryDef(KillingDef, Current, SILoc, SILocUnd, Cache, + ScanLimit, WalkerStepLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); continue; } - MemoryAccess *DomAccess = *Next; - LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess); - if (isa(DomAccess)) { + MemoryAccess *EarlierAccess = *Next; + LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); + if (isa(EarlierAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); - for (Value *V : cast(DomAccess)->incoming_values()) { + for (Value *V : cast(EarlierAccess)->incoming_values()) { MemoryAccess *IncomingAccess = cast(V); BasicBlock *IncomingBlock = IncomingAccess->getBlock(); - BasicBlock *PhiBlock = DomAccess->getBlock(); + BasicBlock *PhiBlock = EarlierAccess->getBlock(); // We only consider incoming MemoryAccesses that come before the // MemoryPhi. Otherwise we could discover candidates that do not @@ -2285,7 +2307,7 @@ } continue; } - MemoryDef *NextDef = dyn_cast(DomAccess); + MemoryDef *NextDef = dyn_cast(EarlierAccess); Instruction *NI = NextDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; XFAIL: * + ; REQUIRES: asserts ; Eliminates store to %R in the entry block. diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll @@ -1,4 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; XFAIL: * + ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s ; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -406,7 +406,6 @@ ; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_1]], label [[FOR_BODY_I]], label [[INIT_PARSE_EXIT:%.*]] ; CHECK: init_parse.exit: -; CHECK-NEXT: store i32 0, i32* @linenum, align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull undef) ; CHECK-NEXT: store i32 0, i32* @linenum, align 4 ; CHECK-NEXT: br label [[FOR_BODY_I20:%.*]]