diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -468,8 +469,9 @@ // If the start pointers are the same, we just have to compare sizes to see if // the later store was larger than the earlier store. if (P1 == P2 || AA.isMustAlias(P1, P2)) { - // Make sure that the Later size is >= the Earlier size. - if (LaterSize >= EarlierSize) + if (EarlierOff >= LaterOff && + LaterSize >= EarlierSize && + uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) return OW_Complete; } @@ -492,10 +494,13 @@ // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL); + int64_t ExtraEarlierOff = 0; + int64_t ExtraLaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, ExtraEarlierOff, DL); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, ExtraLaterOff, DL); + + EarlierOff += ExtraEarlierOff; + LaterOff += ExtraLaterOff; // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) @@ -1388,7 +1393,8 @@ // to by the earlier one. if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; + int64_t InstWriteOffset = 0; + int64_t DepWriteOffset = 0; OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset, *AA, BB.getParent()); @@ -1570,6 +1576,95 @@ return false; } +using PhiTransPtr = std::pair; +using ExMemoryAccess = std::pair; + +static PhiTransPtr phiTranslatePtr(const PhiTransPtr &Ptr, BasicBlock *FromBB, + BasicBlock *ToBB, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { + PhiTransPtr ResPtr; + DenseMap Visited; + SmallVector, 16> WorkList; + + if (FromBB == ToBB) { + return Ptr; + } + + WorkList.push_back({FromBB, Ptr}); + + while (!WorkList.empty()) { + auto &CurrNode = WorkList.back(); + WorkList.pop_back(); + BasicBlock *CurrBB = CurrNode.first; + const PhiTransPtr &CurrPtr = CurrNode.second; + + for (pred_iterator PI = pred_begin(CurrBB), E = pred_end(CurrBB); PI != E; + ++PI) { + BasicBlock *PredBB = *PI; + int64_t Offset = 0; + Value *BasePtr = + GetPointerBaseWithConstantOffset(CurrPtr.first, Offset, DL); + Offset += CurrPtr.second; + PHITransAddr TransAddr{BasePtr, DL, AC}; + + // TODO: + if (!DT.dominates(ToBB, PredBB)) + continue; + + if (TransAddr.NeedsPHITranslationFromBlock(CurrBB) && + (!TransAddr.IsPotentiallyPHITranslatable() || + TransAddr.PHITranslateValue(CurrBB, PredBB, &DT, false))) + return PhiTransPtr{}; + + auto Inserted = Visited.try_emplace(PredBB, TransAddr.getAddr(), Offset); + auto &TransPtr = Inserted.first->second; + if (!Inserted.second) { + if (TransAddr.getAddr() != TransPtr.first || + Offset != TransPtr.second) + // We already visited this block before. If it was with a different + // address - bail out! + return PhiTransPtr{}; + continue; + } + + if (PredBB == ToBB) { + ResPtr = TransPtr; + continue; + } + + WorkList.push_back({PredBB, TransPtr}); + } + } + + assert(ResPtr.first.pointsToAliveValue() && + "PHI translation is expected to complete successfully"); + return ResPtr; +} + +static ExMemoryAccess +phiTransFromMemoryAccessTo(const ExMemoryAccess &FromAccess, + MemoryAccess *ToAccess, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC) { + PhiTransPtr ResAddr; + + if (FromAccess.second.first.pointsToAliveValue()) { + + ResAddr = phiTranslatePtr(FromAccess.second, FromAccess.first->getBlock(), + ToAccess->getBlock(), DL, DT, AC); + } + + return std::make_pair(ToAccess, ResAddr); +} + +static ExMemoryAccess getExDefiningAccess(const ExMemoryAccess &CurrAccess, + const DataLayout &DL, DominatorTree &DT, + AssumptionCache *AC) { + assert(isa(CurrAccess.first) && "TODO"); + MemoryAccess *Next = + cast(CurrAccess.first)->getDefiningAccess(); + return phiTransFromMemoryAccessTo(CurrAccess, Next, DL, DT, AC); +} + struct DSEState { Function &F; AliasAnalysis &AA; @@ -1588,6 +1683,7 @@ PostDominatorTree &PDT; const TargetLibraryInfo &TLI; const DataLayout &DL; + AssumptionCache *AC; // All MemoryDefs that potentially could kill other MemDefs. SmallVector MemDefs; @@ -1623,14 +1719,14 @@ }; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI) + PostDominatorTree &PDT, const TargetLibraryInfo &TLI, AssumptionCache *AC) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + DL(F.getParent()->getDataLayout()), AC(AC) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI); + const TargetLibraryInfo &TLI, AssumptionCache *AC) { + DSEState State(F, AA, MSSA, DT, PDT, TLI, AC); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -1741,7 +1837,8 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; - int64_t InstWriteOffset, DepWriteOffset; + int64_t InstWriteOffset = 0; + int64_t DepWriteOffset = 0; if (auto CC = getLocForWriteEx(UseInst)) return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F) == OW_Complete; @@ -1840,7 +1937,8 @@ getUnderlyingObject(MaybeTermLoc->first.Ptr)) return false; - int64_t InstWriteOffset, DepWriteOffset; + int64_t InstWriteOffset = 0; + int64_t DepWriteOffset = 0; return MaybeTermLoc->second || isOverwrite(MaybeTerm, AccessI, MaybeTermLoc->first, Loc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, @@ -1900,8 +1998,8 @@ // such MemoryDef, return None. The returned value may not (completely) // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing // MemoryUse (read). - Optional - getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, + Optional + getDomMemoryDef(MemoryDef *KillingDef, const ExMemoryAccess &ExStartAccess, MemoryLocation DefLoc, const Value *DefUO, CheckCache &Cache, unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm, unsigned &PartialLimit) { @@ -1910,14 +2008,15 @@ return None; } - MemoryAccess *Current = StartAccess; + MemoryAccess * const StartAccess = ExStartAccess.first; Instruction *KillingI = KillingDef->getMemoryInst(); - bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); + ExMemoryAccess CurrentAccess = ExStartAccess; // Find the next clobbering Mod access for DefLoc, starting at StartAccess. - do { - StepAgain = false; + for (; true; + CurrentAccess = getExDefiningAccess(CurrentAccess, DL, DT, AC)) { + MemoryAccess *Current = CurrentAccess.first; LLVM_DEBUG({ dbgs() << " visiting " << *Current; if (!MSSA.isLiveOnEntryDef(Current) && isa(Current)) @@ -1947,7 +2046,7 @@ // caller is responsible for traversing them. if (isa(Current)) { LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n"); - return Current; + return CurrentAccess; } // Below, check if CurrentDef is a valid candidate to be eliminated by @@ -1956,8 +2055,6 @@ Instruction *CurrentI = CurrentDef->getMemoryInst(); if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); continue; } @@ -2001,16 +2098,12 @@ // If Current cannot be analyzed or is not removable, check the next // candidate. if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); continue; } // If Current does not have an analyzable write location, skip it auto CurrentLoc = getLocForWriteEx(CurrentI); if (!CurrentLoc) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); continue; } @@ -2019,45 +2112,53 @@ // the next candidate if the current Current does not write the same // underlying object as the terminator. if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); + continue; } - continue; + break; } else { // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same // memory location and not multiple locations in a loop. if (Current->getBlock() != KillingDef->getBlock() && !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; continue; } - int64_t InstWriteOffset, DepWriteOffset; - auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); + int64_t DefOffset = 0; + int64_t CurrOffset = 0; + const Value *ResDefPtr = DefLoc.Ptr; + + if (CurrentAccess.second.first.pointsToAliveValue()) { + ResDefPtr = CurrentAccess.second.first; + DefOffset = CurrentAccess.second.second; + } + + MemoryLocation ResDefLoc(ResDefPtr, DefLoc.Size, DefLoc.AATags); + + auto OR = + isOverwrite(KillingI, CurrentI, ResDefLoc, *CurrentLoc, DL, TLI, + CurrOffset, DefOffset, BatchAA, &F); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); + continue; } else if (OR == OW_MaybePartial) { // If KillingDef only partially overwrites Current, check the next // candidate if the partial step limit is exceeded. This aggressively // limits the number of candidates for partial store elimination, // which are less likely to be removable in the end. if (PartialLimit <= 1) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); WalkerStepLimit -= 1; continue; } PartialLimit -= 1; + break; + } else { + break; } } - } while (StepAgain); + }; // Accesses to objects accessible after the function returns can only be // eliminated if the access is killed along all paths to the exit. Collect @@ -2065,7 +2166,8 @@ // they cover all paths from EarlierAccess to any function exit. SmallPtrSet KillingDefs; KillingDefs.insert(KillingDef->getMemoryInst()); - MemoryAccess *EarlierAccess = Current; + ExMemoryAccess &ExEarlierAccess = CurrentAccess; + MemoryAccess *EarlierAccess = CurrentAccess.first; Instruction *EarlierMemInst = cast(EarlierAccess)->getMemoryInst(); LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " (" @@ -2221,7 +2323,7 @@ // post-dominates EarlierAccess. if (KillingBlocks.count(CommonPred)) { if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) - return {EarlierAccess}; + return {ExEarlierAccess}; return None; } @@ -2262,7 +2364,7 @@ return None; } NumCFGSuccess++; - return {EarlierAccess}; + return {ExEarlierAccess}; } return None; } @@ -2270,7 +2372,7 @@ // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is // potentially dead. Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end()); - return {EarlierAccess}; + return {ExEarlierAccess}; } // Delete dead memory defs @@ -2461,13 +2563,15 @@ } }; +// ToBB must dominate FromBB bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + AssumptionCache *AC) { bool MadeChange = false; - - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + const DataLayout &DL = F.getParent()->getDataLayout(); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, AC); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -2487,7 +2591,7 @@ << *SI << "\n"); continue; } - MemoryLocation SILoc = *MaybeSILoc; + const MemoryLocation SILoc = *MaybeSILoc; assert(SILoc.Ptr && "SILoc should not be null"); const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr); @@ -2500,7 +2604,6 @@ continue; } - MemoryAccess *Current = KillingDef; LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *KillingDef << " (" << *SI << ")\n"); @@ -2508,21 +2611,24 @@ unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit; unsigned PartialLimit = MemorySSAPartialStoreLimit; // Worklist of MemoryAccesses that may be killed by KillingDef. - SetVector ToCheck; - ToCheck.insert(KillingDef->getDefiningAccess()); + SmallVector ToCheck; + ExMemoryAccess ExKillingDef{KillingDef, {const_cast(SILoc.Ptr), 0}}; + ToCheck.push_back(getExDefiningAccess(ExKillingDef, DL, DT, AC)); if (!SILocUnd) continue; bool IsMemTerm = State.isMemTerminatorInst(SI); DSEState::CheckCache Cache; // Check if MemoryAccesses in the worklist are killed by KillingDef. - for (unsigned I = 0; I < ToCheck.size(); I++) { - Current = ToCheck[I]; + while (!ToCheck.empty()) { + const ExMemoryAccess CurrentAccess = ToCheck.pop_back_val(); + MemoryAccess *Current = CurrentAccess.first; + if (State.SkipStores.count(Current)) continue; - Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit, + Optional Next = State.getDomMemoryDef( + KillingDef, CurrentAccess, SILoc, SILocUnd, Cache, ScanLimit, WalkerStepLimit, IsMemTerm, PartialLimit); if (!Next) { @@ -2530,7 +2636,8 @@ continue; } - MemoryAccess *EarlierAccess = *Next; + ExMemoryAccess &ExEarlierAccess = *Next; + MemoryAccess *EarlierAccess = ExEarlierAccess.first; LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess); if (isa(EarlierAccess)) { LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n"); @@ -2544,14 +2651,15 @@ // strictly dominate our starting def. if (State.PostOrderNumbers[IncomingBlock] > State.PostOrderNumbers[PhiBlock]) - ToCheck.insert(IncomingAccess); + ToCheck.push_back(phiTransFromMemoryAccessTo( + ExEarlierAccess, IncomingAccess, DL, DT, AC)); } continue; } MemoryDef *NextDef = dyn_cast(EarlierAccess); Instruction *NI = NextDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " (" << *NI << ")\n"); - ToCheck.insert(NextDef->getDefiningAccess()); + ToCheck.push_back(getExDefiningAccess(ExEarlierAccess, DL, DT, AC)); NumGetDomMemoryDefPassed++; if (!DebugCounter::shouldExecute(MemorySSACounter)) @@ -2559,9 +2667,22 @@ MemoryLocation NILoc = *State.getLocForWriteEx(NI); + // Check if NI overwrites SI. + int64_t ResOffset = 0; + int64_t CurOffset = 0; + const Value *ResSIPtr = SILoc.Ptr; + + if (CurrentAccess.second.first.pointsToAliveValue()) { + ResSIPtr = CurrentAccess.second.first; + ResOffset = CurrentAccess.second.second; + } + + MemoryLocation ResSILoc(ResSIPtr, SILoc.Size, SILoc.AATags); + const Value *ResSILocUnd = getUnderlyingObject(ResSIPtr); + if (IsMemTerm) { const Value *NIUnd = getUnderlyingObject(NILoc.Ptr); - if (SILocUnd != NIUnd) + if (ResSILocUnd != NIUnd) continue; LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI << "\n KILLER: " << *SI << '\n'); @@ -2569,17 +2690,15 @@ ++NumFastStores; MadeChange = true; } else { - // Check if NI overwrites SI. - int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = - isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + isOverwrite(SI, NI, ResSILoc, NILoc, State.DL, TLI, CurOffset, + ResOffset, State.BatchAA, &F); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( NI->getParent(), InstOverlapIntervalsTy())); auto &IOL = Iter.first->second; - OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset, + OR = isPartialOverwrite(ResSILoc, NILoc, CurOffset, ResOffset, NI, IOL); } @@ -2591,7 +2710,7 @@ // TODO: implement tryToMergeParialOverlappingStores using MemorySSA. if (Earlier && Later && DT.dominates(Earlier, Later)) { if (Constant *Merged = tryToMergePartialOverlappingStores( - Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL, + Earlier, Later, ResOffset, CurOffset, State.DL, State.BatchAA, &DT)) { // Update stored value of earlier store to merged constant. @@ -2637,13 +2756,14 @@ AliasAnalysis &AA = AM.getResult(F); const TargetLibraryInfo &TLI = AM.getResult(F); DominatorTree &DT = AM.getResult(F); + AssumptionCache &AC = AM.getResult(F); bool Changed = false; if (EnableMemorySSA) { MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI, &AC); } else { MemoryDependenceResults &MD = AM.getResult(F); @@ -2694,8 +2814,10 @@ MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); + AssumptionCache &AC = + getAnalysis().getAssumptionCache(F); - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI, &AC); } else { MemoryDependenceResults &MD = getAnalysis().getMemDep(); @@ -2723,8 +2845,10 @@ if (EnableMemorySSA) { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } else { AU.addRequired(); AU.addPreserved();