diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1174,7 +1174,8 @@ << *Def->getMemoryInst() << ") is at the end the function \n"); - auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + Instruction *DefInst = Def->getMemoryInst(); + auto MaybeLoc = getLocForWriteEx(DefInst); if (!MaybeLoc) { LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); return false; @@ -1206,7 +1207,7 @@ // TODO: Checking for aliasing is expensive. Consider reducing the amount // of times this is called and/or caching it. Instruction *UseInst = cast(UseAccess)->getMemoryInst(); - if (isReadClobber(*MaybeLoc, UseInst)) { + if (isReadClobber(DefInst, *MaybeLoc, 0, UseInst)) { LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); return false; } @@ -1273,7 +1274,10 @@ } // Returns true if \p Use may read from \p DefLoc. - bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) { + bool isReadClobber(Instruction *DefInst, const MemoryLocation &DefLoc, + int64_t DefOffset, Instruction *UseInst, + Optional UseLoc = None, + int64_t UseOffset = 0) { if (isNoopIntrinsic(UseInst)) return false; @@ -1289,11 +1293,25 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; + if (!UseLoc) { + UseLoc = getLocForWriteEx(UseInst); + if (!UseLoc) + UseLoc = MemoryLocation::getOrNone(UseInst); + } + + if (UseLoc && + isOverwrite(DefInst, UseInst, DefLoc, *UseLoc, UseOffset, DefOffset) == + OW_None) + return false; + // NOTE: For calls, the number of stores removed could be slightly improved // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to // be expensive compared to the benefits in practice. For now, avoid more // expensive analysis to limit compile-time. - return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + if (DefOffset == 0 && UseOffset == 0) + return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc)); + + return true; } /// Returns true if a dependency between \p Current and \p KillingDef is @@ -1425,18 +1443,39 @@ return None; } + // If Current does not have an analyzable write location, skip it + int64_t CurrOffset = 0; + CurrentLoc = getLocForWriteEx(CurrentI); + if (!CurrentLoc) + continue; + // If Current is known to be on path that reads DefLoc or is a read // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && isReadClobber(DefLoc, CurrentI)) + if (!isa(CurrentI) && + isReadClobber(KillingI, ResDefLoc, DefOffset, CurrentI, CurrentLoc, + CurrOffset)) return None; // Quick check if there are direct uses that are read-clobbers. - if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { - if (auto *UseOrDef = dyn_cast(U.getUser())) - return !MSSA.dominates(StartAccess, UseOrDef) && - isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + if (any_of(Current->uses(), [this, KillingI, CurrentI, &DefLoc, + &ResDefLoc, DefOffset, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) { + auto *UseInst = UseOrDef->getMemoryInst(); + if (MSSA.dominates(StartAccess, UseOrDef)) + return false; + if (UseInst->getParent() == CurrentI->getParent()) { + if (!isReadClobber(KillingI, ResDefLoc, DefOffset, UseInst)) { + return false; + } + } else { + if (!isReadClobber(KillingI, DefLoc, 0, UseInst)) { + return false; + } + } + return true; + } return false; })) { LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); @@ -1448,11 +1487,6 @@ if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) continue; - // If Current does not have an analyzable write location, skip it - CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) - continue; - // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same // memory location and not located in different loops.