diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1287,7 +1287,8 @@ << *Def->getMemoryInst() << ") is at the end the function \n"); - auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + Instruction *DefInst = Def->getMemoryInst(); + auto MaybeLoc = getLocForWriteEx(DefInst); if (!MaybeLoc) { LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); return false; @@ -1319,7 +1320,7 @@ // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. Instruction *UseInst = cast(UseAccess)->getMemoryInst(); - if (isReadClobber(*MaybeLoc, UseInst)) { + if (isReadClobber(DefInst, *MaybeLoc, 0, UseInst)) { LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); return false; } @@ -1386,7 +1387,10 @@ } // Returns true if \p Use may read from \p DefLoc. - bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) { + bool isReadClobber(Instruction *DefInst, const MemoryLocation &DefLoc, + const int64_t DefOffset, Instruction *UseInst, + Optional UseLoc = None, + const int64_t UseOffset = 0) { if (isNoopIntrinsic(UseInst)) return false; @@ -1402,11 +1406,27 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; + if (!UseLoc) { + UseLoc = getLocForWriteEx(UseInst); + if (!UseLoc) + UseLoc = MemoryLocation::getOrNone(UseInst); + } + + int64_t DummyDefOffset = DefOffset; + int64_t DummyUseOffset = UseOffset; + if (UseLoc && + isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, DummyUseOffset, + DummyDefOffset) == OW_None) + return false; + // NOTE: For calls, the number of stores removed could be slightly improved // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to // be expensive compared to the benefits in practice. For now, avoid more // expensive analysis to limit compile-time. - return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + if (DefOffset == UseOffset) + return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + + return true; } /// Returns true if a dependency between \p Current and \p KillingDef is @@ -1556,21 +1576,46 @@ return None; } + // If Current does not have an analyzable write location, skip it + int64_t CurrOffset = 0; + CurrentLoc = getLocForWriteEx(CurrentI); + if (!CurrentLoc) + continue; + // If Current is known to be on path that reads DefLoc or is a read // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && isReadClobber(KillingLoc, CurrentI)) + if (!isa(CurrentI) && + isReadClobber(KillingI, KillingLoc, 0, CurrentI, CurrentLoc, 0)) return None; - // Quick check if there are direct uses that are read-clobbers. - if (any_of( - CurrentAccess->uses(), [this, &KillingLoc, StartAccess](Use &U) { - if (auto *UseOrDef = dyn_cast(U.getUser())) - return !MSSA.dominates(StartAccess, UseOrDef) && - isReadClobber(KillingLoc, UseOrDef->getMemoryInst()); + if (any_of(CurrentAccess->uses(), [this, KillingI, CurrentI, &KillingLoc, + PhiTransKillingLocAndOffset, + StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) { + auto *UseInst = UseOrDef->getMemoryInst(); + if (MSSA.dominates(StartAccess, UseOrDef)) return false; - })) { + // Use phi translated value for KillingLoc (from basic + // block of KillingI to CurrentI) only if UseInst is from + // the same basic block as CurrentI. + if (PhiTransKillingLocAndOffset && + UseInst->getParent() == CurrentI->getParent()) { + if (!isReadClobber( + KillingI, (*PhiTransKillingLocAndOffset).first, + (*PhiTransKillingLocAndOffset).second, UseInst)) { + return false; + } + } else { + if (!isReadClobber(KillingI, KillingLoc, 0, UseInst)) { + return false; + } + } + return true; + } + return false; + })) { LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); return None; } @@ -1580,12 +1625,6 @@ if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) continue; - // If Current does not have an analyzable write location, skip it - int64_t CurrOffset = 0; - CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) - continue; - // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same // memory location and not located in different loops. @@ -1705,11 +1744,23 @@ return None; } - // Uses which may read the original MemoryDef mean we cannot eliminate the - // original MD. Stop walk. - if (isReadClobber(MaybeDeadLoc, UseInst)) { - LLVM_DEBUG(dbgs() << " ... found read clobber\n"); - return None; + // Uses which may read the original KillingDef mean we cannot eliminate + // the original MD. Stop walk. + // Use phi translated value for KillingLoc (from basic block of + // KillingI to CurrentI) only if UseInst is from the same basic + // block as CurrentI. + if (PhiTransKillingLocAndOffset && + UseInst->getParent() == MaybeDeadI->getParent()) { + if (isReadClobber(KillingI, (*PhiTransKillingLocAndOffset).first, + (*PhiTransKillingLocAndOffset).second, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } + } else { + if (isReadClobber(MaybeDeadI, MaybeDeadLoc, 0, UseInst)) { + LLVM_DEBUG(dbgs() << " ... found read clobber\n"); + return None; + } } // If this worklist walks back to the original memory access (and the