diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -325,6 +326,7 @@ OW_End, OW_PartialEarlierWithFullLater, OW_MaybePartial, + OW_None, OW_Unknown }; @@ -397,7 +399,8 @@ // the later store was larger than the earlier store. if (AAR == AliasResult::MustAlias) { // Make sure that the Later size is >= the Earlier size. - if (LaterSize >= EarlierSize) + if (EarlierOff >= LaterOff && LaterSize >= EarlierSize && + uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) return OW_Complete; } @@ -429,10 +432,13 @@ // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); + int64_t ExtraEarlierOff = 0; + int64_t ExtraLaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, ExtraEarlierOff, DL); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, ExtraLaterOff, DL); + + EarlierOff += ExtraEarlierOff; + LaterOff += ExtraLaterOff; // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) @@ -471,7 +477,7 @@ // Can reach here only if accesses are known not to overlap. There is no // dedicated code to indicate no overlap so signal "unknown". - return OW_Unknown; + return OW_None; } /// Return 'OW_Complete' if a store to the 'Later' location completely @@ -954,6 +960,95 @@ return false; } +using PhiTransPtr = std::pair; +using ExMemoryAccess = std::pair; + +static PhiTransPtr phiTranslatePtr(const PhiTransPtr &Ptr, BasicBlock *FromBB, + BasicBlock *ToBB, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { + PhiTransPtr ResPtr; + DenseMap Visited; + SmallVector, 16> WorkList; + + if (FromBB == ToBB) { + return Ptr; + } + + WorkList.push_back({FromBB, Ptr}); + + while (!WorkList.empty()) { + auto &CurrNode = WorkList.back(); + WorkList.pop_back(); + BasicBlock *CurrBB = CurrNode.first; + const PhiTransPtr &CurrPtr = CurrNode.second; + + for (pred_iterator PI = pred_begin(CurrBB), E = pred_end(CurrBB); PI != E; + ++PI) { + BasicBlock *PredBB = *PI; + int64_t Offset = 0; + Value *BasePtr = + GetPointerBaseWithConstantOffset(CurrPtr.first, Offset, DL); + Offset += CurrPtr.second; + PHITransAddr TransAddr{BasePtr, DL, AC}; + + // TODO: + if (!DT.dominates(ToBB, PredBB)) + continue; + + if (TransAddr.NeedsPHITranslationFromBlock(CurrBB) && + (!TransAddr.IsPotentiallyPHITranslatable() || + TransAddr.PHITranslateValue(CurrBB, PredBB, &DT, false))) + return PhiTransPtr{}; + + auto Inserted = Visited.try_emplace(PredBB, TransAddr.getAddr(), Offset); + auto &TransPtr = Inserted.first->second; + if (!Inserted.second) { + if (TransAddr.getAddr() != TransPtr.first || + Offset != TransPtr.second) + // We already visited this block before. If it was with a different + // address - bail out! + return PhiTransPtr{}; + continue; + } + + if (PredBB == ToBB) { + ResPtr = TransPtr; + continue; + } + + WorkList.push_back({PredBB, TransPtr}); + } + } + + assert(ResPtr.first.pointsToAliveValue() && + "PHI translation is expected to complete successfully"); + return ResPtr; +} + +static ExMemoryAccess +phiTransFromMemoryAccessTo(const ExMemoryAccess &FromAccess, + MemoryAccess *ToAccess, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { + PhiTransPtr ResAddr; + + if (FromAccess.second.first.pointsToAliveValue()) { + + ResAddr = phiTranslatePtr(FromAccess.second, FromAccess.first->getBlock(), + ToAccess->getBlock(), DL, DT, AC); + } + + return std::make_pair(ToAccess, ResAddr); +} + +static ExMemoryAccess getExDefiningAccess(const ExMemoryAccess &CurrAccess, + const DataLayout &DL, DominatorTree &DT, + AssumptionCache *AC) { + assert(isa(CurrAccess.first) && "TODO"); + MemoryAccess *Next = + cast(CurrAccess.first)->getDefiningAccess(); + return phiTransFromMemoryAccessTo(CurrAccess, Next, DL, DT, AC); +} + struct DSEState { Function &F; AliasAnalysis &AA; @@ -972,6 +1067,7 @@ PostDominatorTree &PDT; const TargetLibraryInfo &TLI; const DataLayout &DL; + AssumptionCache *AC; // All MemoryDefs that potentially could kill other MemDefs. SmallVector MemDefs; @@ -995,14 +1091,14 @@ DenseMap IOLs; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + PostDominatorTree &PDT, const TargetLibraryInfo &TLI, AssumptionCache *AC) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + DL(F.getParent()->getDataLayout()), AC(AC) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI); + const TargetLibraryInfo &TLI, AssumptionCache *AC) { + DSEState State(F, AA, MSSA, DT, PDT, TLI, AC); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -1118,7 +1214,8 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; - int64_t InstWriteOffset, DepWriteOffset; + int64_t InstWriteOffset = 0; + int64_t DepWriteOffset = 0; if (auto CC = getLocForWriteEx(UseInst)) return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F) == OW_Complete; @@ -1131,7 +1228,8 @@ << *Def->getMemoryInst() << ") is at the end the function \n"); - auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + Instruction *DefInst = Def->getMemoryInst(); + auto MaybeLoc = getLocForWriteEx(DefInst); if (!MaybeLoc) { LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); return false; @@ -1163,7 +1261,7 @@ // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. Instruction *UseInst = cast(UseAccess)->getMemoryInst(); - if (isReadClobber(*MaybeLoc, UseInst)) { + if (isReadClobber(DefInst, *MaybeLoc, 0, UseInst)) { LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); return false; } @@ -1223,14 +1321,18 @@ const Value *LocUO = getUnderlyingObject(Loc.Ptr); return BatchAA.isMustAlias(TermLoc.Ptr, LocUO); } - int64_t InstWriteOffset, DepWriteOffset; + int64_t InstWriteOffset = 0; + int64_t DepWriteOffset = 0; return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. - bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) { + bool isReadClobber(Instruction *DefInst, const MemoryLocation &DefLoc, + int64_t DefOffset, Instruction *UseInst, + Optional UseLoc = None, + int64_t UseOffset = 0) { if (isNoopIntrinsic(UseInst)) return false; @@ -1246,11 +1348,25 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; + if (!UseLoc) { + UseLoc = getLocForWriteEx(UseInst); + if (!UseLoc) + UseLoc = MemoryLocation::getOrNone(UseInst); + } + + if (UseLoc && + isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, DL, TLI, UseOffset, + DefOffset, BatchAA, &F) == OW_None) + return false; + // NOTE: For calls, the number of stores removed could be slightly improved // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to // be expensive compared to the benefits in practice. For now, avoid more // expensive analysis to limit compile-time. - return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + if (DefOffset == 0 && UseOffset == 0) + return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + + return true; } /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible @@ -1290,8 +1406,8 @@ // such MemoryDef, return None. The returned value may not (completely) // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing // MemoryUse (read). - Optional - getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, + Optional + getDomMemoryDef(MemoryDef *KillingDef, const ExMemoryAccess &ExStartAccess, const MemoryLocation &DefLoc, const Value *DefUO, unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm, unsigned &PartialLimit) { @@ -1300,13 +1416,24 @@ return None; } - MemoryAccess *Current = StartAccess; + MemoryAccess * const StartAccess = ExStartAccess.first; Instruction *KillingI = KillingDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); + ExMemoryAccess CurrentAccess = ExStartAccess; // Find the next clobbering Mod access for DefLoc, starting at StartAccess. Optional CurrentLoc; - for (;; Current = cast(Current)->getDefiningAccess()) { + for (;; CurrentAccess = getExDefiningAccess(CurrentAccess, DL, DT, AC)) { + MemoryAccess *Current = CurrentAccess.first; + int64_t DefOffset = 0; + const Value *ResDefPtr = DefLoc.Ptr; + + if (CurrentAccess.second.first.pointsToAliveValue()) { + ResDefPtr = CurrentAccess.second.first; + DefOffset = CurrentAccess.second.second; + } + + MemoryLocation ResDefLoc(ResDefPtr, DefLoc.Size, DefLoc.AATags); LLVM_DEBUG({ dbgs() << " visiting " << *Current; if (!MSSA.isLiveOnEntryDef(Current) && isa(Current)) @@ -1336,7 +1463,7 @@ // caller is responsible for traversing them. if (isa(Current)) { LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); - return Current; + return CurrentAccess; } // Below, check if CurrentDef is a valid candidate to be eliminated by @@ -1362,18 +1489,37 @@ return None; } + // If Current does not have an analyzable write location, skip it + int64_t CurrOffset = 0; + CurrentLoc = getLocForWriteEx(CurrentI); + if (!CurrentLoc) { + continue; + } // If Current is known to be on path that reads DefLoc or is a read // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && isReadClobber(DefLoc, CurrentI)) + if (!isa(CurrentI) && isReadClobber(KillingI, ResDefLoc, DefOffset, CurrentI, CurrentLoc, CurrOffset)) return None; // Quick check if there are direct uses that are read-clobbers. - if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { - if (auto *UseOrDef = dyn_cast(U.getUser())) - return !MSSA.dominates(StartAccess, UseOrDef) && - isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + if (any_of(Current->uses(), [this, KillingI, CurrentI, &DefLoc, + &ResDefLoc, DefOffset, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) { + auto *UseInst = UseOrDef->getMemoryInst(); + if (MSSA.dominates(StartAccess, UseOrDef)) + return false; + if (UseInst->getParent() == CurrentI->getParent()) { + if (!isReadClobber(KillingI, ResDefLoc, DefOffset, UseInst)) { + return false; + } + } else { + if (!isReadClobber(KillingI, DefLoc, 0, UseInst)) { + return false; + } + } + return true; + } return false; })) { LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); @@ -1386,12 +1532,6 @@ continue; } - // If Current does not have an analyzable write location, skip it - CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) { - continue; - } - // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same // memory location and not multiple locations in a loop. @@ -1410,12 +1550,11 @@ } break; } else { - int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); + auto OR = isOverwrite(KillingI, CurrentI, ResDefLoc, *CurrentLoc, DL, + TLI, CurrOffset, DefOffset, BatchAA, &F); // If Current does not write to the same object as KillingDef, check // the next candidate. - if (OR == OW_Unknown) { + if (OR == OW_Unknown || OR == OW_None) { continue; } else if (OR == OW_MaybePartial) { // If KillingDef only partially overwrites Current, check the next @@ -1424,6 +1563,7 @@ // which are less likely to be removable in the end. if (PartialLimit <= 1) { WalkerStepLimit -= 1; + LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n"); continue; } PartialLimit -= 1; @@ -1440,7 +1580,8 @@ // they cover all paths from EarlierAccess to any function exit. SmallPtrSet KillingDefs; KillingDefs.insert(KillingDef->getMemoryInst()); - MemoryAccess *EarlierAccess = Current; + ExMemoryAccess &ExEarlierAccess = CurrentAccess; + MemoryAccess *EarlierAccess = CurrentAccess.first; Instruction *EarlierMemInst = cast(EarlierAccess)->getMemoryInst(); LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " (" @@ -1517,46 +1658,64 @@ // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(*CurrentLoc, UseInst)) { - LLVM_DEBUG(dbgs() << " ... found read clobber\n"); - return None; - } + if (UseInst->getParent() == EarlierMemInst->getParent()) { + int64_t DefOffset = 0; + const Value *ResDefPtr = DefLoc.Ptr; - // For the KillingDef and EarlierAccess we only have to check if it reads - // the memory location. - // TODO: It would probably be better to check for self-reads before - // calling the function. - if (KillingDef == UseAccess || EarlierAccess == UseAccess) { - LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); - continue; + if (CurrentAccess.second.first.pointsToAliveValue()) { + ResDefPtr = CurrentAccess.second.first; + DefOffset = CurrentAccess.second.second; + } + + MemoryLocation ResDefLoc(ResDefPtr, DefLoc.Size, DefLoc.AATags); + if (isReadClobber(KillingI, ResDefLoc, DefOffset, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } + } else { + if (isReadClobber(EarlierMemInst, *CurrentLoc, 0, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } } - // Check all uses for MemoryDefs, except for defs completely overwriting - // the original location. Otherwise we have to check uses of *all* - // MemoryDefs we discover, including non-aliasing ones. Otherwise we might - // miss cases like the following - // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] - // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] - // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. - // (The Use points to the *first* Def it may alias) - // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, - // stores [0,1] - if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { - if (!isInvisibleToCallerAfterRet(DefUO) && - UseAccess != EarlierAccess) { - BasicBlock *MaybeKillingBlock = UseInst->getParent(); - if (PostOrderNumbers.find(MaybeKillingBlock)->second < - PostOrderNumbers.find(EarlierAccess->getBlock())->second) { - - LLVM_DEBUG(dbgs() - << " ... found killing def " << *UseInst << "\n"); - KillingDefs.insert(UseInst); + // For the KillingDef and EarlierAccess we only have to check if it + // reads + // the memory location. + // TODO: It would probably be better to check for self-reads before + // calling the function. + if (KillingDef == UseAccess || EarlierAccess == UseAccess) { + LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n"); + continue; + } + + // Check all uses for MemoryDefs, except for defs completely overwriting + // the original location. Otherwise we have to check uses of *all* + // MemoryDefs we discover, including non-aliasing ones. Otherwise we + // might + // miss cases like the following + // 1 = Def(LoE) ; <----- EarlierDef stores [0,1] + // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3] + // Use(2) ; MayAlias 2 *and* 1, loads [0, 3]. + // (The Use points to the *first* Def it may alias) + // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, + // stores [0,1] + if (MemoryDef *UseDef = dyn_cast(UseAccess)) { + if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { + if (!isInvisibleToCallerAfterRet(DefUO) && + UseAccess != EarlierAccess) { + BasicBlock *MaybeKillingBlock = UseInst->getParent(); + if (PostOrderNumbers.find(MaybeKillingBlock)->second < + PostOrderNumbers.find(EarlierAccess->getBlock())->second) { + + LLVM_DEBUG(dbgs() + << " ... found killing def " << *UseInst << "\n"); + KillingDefs.insert(UseInst); + } } - } - } else - PushMemUses(UseDef); - } + } else + PushMemUses(UseDef); + } } // For accesses to locations visible after the function returns, make sure @@ -1582,7 +1741,7 @@ // post-dominates EarlierAccess. if (KillingBlocks.count(CommonPred)) { if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) - return {EarlierAccess}; + return {ExEarlierAccess}; return None; } @@ -1623,14 +1782,14 @@ return None; } NumCFGSuccess++; - return {EarlierAccess}; + return {ExEarlierAccess}; } return None; } // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is // potentially dead. - return {EarlierAccess}; + return {ExEarlierAccess}; } // Delete dead memory defs @@ -1824,10 +1983,12 @@ bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + AssumptionCache *AC) { bool MadeChange = false; - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + const DataLayout &DL = F.getParent()->getDataLayout(); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, AC); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -1847,41 +2008,45 @@ << *SI << "\n"); continue; } - MemoryLocation SILoc = *MaybeSILoc; + const MemoryLocation SILoc = *MaybeSILoc; assert(SILoc.Ptr && "SILoc should not be null"); const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); - MemoryAccess *Current = KillingDef; LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " - << *Current << " (" << *SI << ")\n"); + << *KillingDef << " (" << *SI << ")\n"); unsigned ScanLimit = MemorySSAScanLimit; unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; unsigned PartialLimit = MemorySSAPartialStoreLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. - SetVector ToCheck; + SmallVector ToCheck; - if (SILocUnd) - ToCheck.insert(KillingDef->getDefiningAccess()); + if (SILocUnd) { + ExMemoryAccess ExKillingDef{KillingDef, + {const_cast(SILoc.Ptr), 0}}; + ToCheck.push_back(getExDefiningAccess(ExKillingDef, DL, DT, AC)); + } bool Shortend = false; bool IsMemTerm = State.isMemTerminatorInst(SI); // Check if MemoryAccesses in the worklist are killed by KillingDef. - for (unsigned I = 0; I < ToCheck.size(); I++) { - Current = ToCheck[I]; + while (!ToCheck.empty()) { + const ExMemoryAccess CurrentAccess = ToCheck.pop_back_val(); + MemoryAccess *Current = CurrentAccess.first; if (State.SkipStores.count(Current)) continue; - Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, - IsMemTerm, PartialLimit); + Optional Next = State.getDomMemoryDef( + KillingDef, CurrentAccess, SILoc, SILocUnd, ScanLimit, + WalkerStepLimit, IsMemTerm, PartialLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); continue; } - MemoryAccess *EarlierAccess = *Next; + ExMemoryAccess &ExEarlierAccess = *Next; + MemoryAccess *EarlierAccess = ExEarlierAccess.first; LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); if (isa(EarlierAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); @@ -1895,14 +2060,15 @@ // strictly dominate our starting def. if (State.PostOrderNumbers[IncomingBlock] > State.PostOrderNumbers[PhiBlock]) - ToCheck.insert(IncomingAccess); + ToCheck.push_back(phiTransFromMemoryAccessTo( + ExEarlierAccess, IncomingAccess, DL, DT, AC)); } continue; } auto *NextDef = cast(EarlierAccess); Instruction *NI = NextDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); - ToCheck.insert(NextDef->getDefiningAccess()); + ToCheck.push_back(getExDefiningAccess(ExEarlierAccess, DL, DT, AC)); NumGetDomMemoryDefPassed++; if (!DebugCounter::shouldExecute(MemorySSACounter)) @@ -1910,9 +2076,22 @@ MemoryLocation NILoc = *State.getLocForWriteEx(NI); + // Check if NI overwrites SI. + int64_t ResOffset = 0; + int64_t CurOffset = 0; + const Value *ResSIPtr = SILoc.Ptr; + + if (CurrentAccess.second.first.pointsToAliveValue()) { + ResSIPtr = CurrentAccess.second.first; + ResOffset = CurrentAccess.second.second; + } + + MemoryLocation ResSILoc(ResSIPtr, SILoc.Size, SILoc.AATags); + const Value *ResSILocUnd = getUnderlyingObject(ResSIPtr); + if (IsMemTerm) { const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); - if (SILocUnd != NIUnd) + if (ResSILocUnd != NIUnd) continue; LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI << "\n KILLER: " << *SI << '\n'); @@ -1920,17 +2099,15 @@ ++NumFastStores; MadeChange = true; } else { - // Check if NI overwrites SI. - int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + isOverwrite(SI, NI, ResSILoc, NILoc, State.DL, TLI, CurOffset, + ResOffset, State.BatchAA, &F); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( NI->getParent(), InstOverlapIntervalsTy())); auto &IOL = Iter.first->second; - OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, + OR = isPartialOverwrite(ResSILoc, NILoc, CurOffset, ResOffset, NI, IOL); } @@ -1942,7 +2119,7 @@ // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. if (Earlier && Later && DT.dominates(Earlier, Later)) { if (Constant *Merged = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL, + Earlier, Later, ResOffset, CurOffset, State.DL, State.BatchAA, &DT)) { // Update stored value of earlier store to merged constant. @@ -2001,8 +2178,9 @@ DominatorTree &DT = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); + AssumptionCache &AC = AM.getResult(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, &AC); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2042,8 +2220,10 @@ MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); + AssumptionCache &AC = + getAnalysis().getAssumptionCache(F); - bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI); + bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, &AC); #ifdef LLVM_ENABLE_STATS if (AreStatisticsEnabled()) @@ -2060,11 +2240,13 @@ AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } };