diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -82,6 +82,7 @@ STATISTIC(NumFastOther, "Number of other instrs removed"); STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); STATISTIC(NumModifiedStores, "Number of stores modified"); +STATISTIC(NumDomMemDefChecks, "Number of iterations in getDomMemoryDef"); DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated."); @@ -1466,6 +1467,18 @@ Function *ExitUseFn; + struct CheckCache { + SmallPtrSet KnownNoReads; + SmallPtrSet KnownReads; + + bool isKnownNoRead(MemoryAccess *A) const { + return KnownNoReads.find(A) != KnownNoReads.end(); + } + bool isKnownRead(MemoryAccess *A) const { + return KnownReads.find(A) != KnownReads.end(); + } + }; + DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {} @@ -1677,11 +1690,10 @@ // read access in between or return None otherwise. The returned value may not // (completely) overwrite \p DefLoc. Currently we bail out when we encounter // an aliasing MemoryUse (read). - Optional getDomMemoryDef(MemoryDef *KillingDef, - MemoryAccess *Current, - MemoryLocation DefLoc, - bool DefVisibleToCaller, - int &ScanLimit) const { + Optional + getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, + MemoryLocation DefLoc, bool DefVisibleToCaller, + CheckCache &Cache, int &ScanLimit) const { MemoryAccess *DomAccess; bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current @@ -1723,16 +1735,32 @@ }; PushMemUses(DomAccess); + // Optimistically collect all accesses we for reads. If we do not find any + // read clobbers, add them to the cache. + SmallPtrSet KnownNoReads; // Check if DomDef may be read. for (unsigned I = 0; I < WorkList.size(); I++) { MemoryAccess *UseAccess = WorkList[I]; - + NumDomMemDefChecks++; LLVM_DEBUG(dbgs() << " Checking use " << *UseAccess); if (--ScanLimit == 0) { LLVM_DEBUG(dbgs() << " ... hit scan limit\n"); return None; } + // Check if we already visited this access. + if (Cache.isKnownNoRead(UseAccess)) { + LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess + << " is safe earlier.\n"); + continue; + } + if (Cache.isKnownRead(UseAccess)) { + LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess + << " is has a read-clobber earlier.\n"); + return None; + } + KnownNoReads.insert(UseAccess); + if (isa(UseAccess)) { PushMemUses(UseAccess); continue; @@ -1755,6 +1783,8 @@ // original MD. Stop walk. if (isReadClobber(DefLoc, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + Cache.KnownReads.insert(UseAccess); + Cache.KnownReads.insert(Current); return None; } @@ -1792,6 +1822,7 @@ } } + Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end()); // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. return {DomAccess}; } @@ -1955,6 +1986,7 @@ SetVector ToCheck; ToCheck.insert(KillingDef->getDefiningAccess()); + DSEState::CheckCache Cache; // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { Current = ToCheck[I]; @@ -1962,7 +1994,7 @@ continue; Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, DefVisibleToCaller, ScanLimit); + KillingDef, Current, SILoc, DefVisibleToCaller, Cache, ScanLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/combined-partial-overwrites.ll @@ -252,6 +252,7 @@ ; CHECK-NEXT: [[BPTR:%.*]] = bitcast i32* [[PTR:%.*]] to i8* ; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 1 ; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 2 +; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3 ; CHECK-NEXT: [[WPTR:%.*]] = bitcast i8* [[BPTR]] to i16* ; CHECK-NEXT: [[WPTR1:%.*]] = bitcast i8* [[BPTR1]] to i16* ; CHECK-NEXT: [[WPTR2:%.*]] = bitcast i8* [[BPTR2]] to i16*