Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -33,6 +33,8 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" @@ -447,8 +449,8 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, const TargetLibraryInfo &TLI, - int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, - const Function *F) { + int64_t &EarlierOff, int64_t &LaterOff, AATy &AA, const Function *F, + ScalarEvolution &SE) { // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll // get imprecise values here, though (except for unknown sizes). if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) { @@ -487,6 +489,31 @@ if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) return OW_Complete; + // If we hit a partial alias we may have a full overwrite + AliasResult AAR = AA.alias(Later, Earlier); + if (AAR == AliasResult::PartialAlias) { + // ScalarEvolution lets us reason in details about the pointers + const SCEV *EarlierSCEV = SE.getSCEV(const_cast(P1)); + const SCEV *LaterSCEV = SE.getSCEV(const_cast(P2)); + // We get the difference to know how much the stores are offsetted + const SCEV *Minus = SE.getMinusSCEV(EarlierSCEV, LaterSCEV); + // A constant difference means that the base pointer is the same + if (Minus->getSCEVType() == SCEVTypes::scConstant) { + if (const SCEVConstant *SC = dyn_cast(Minus)) { + APInt Diff = SC->getAPInt(); + // We expect a non negative size, meaning that the earlier store follows + // the later store in the memory layout. + if (Diff.isNonNegative()) + // The later access completely overlaps the earlier store if and only + // if the earlier store is completely contained inside the later + // store. We are in the case where we know how much the earlier store + // if offsetted compared to the later store and we know both sizes. + if ((Diff.getLimitedValue() + EarlierSize) <= LaterSize) + return OW_Complete; + } + } + } + // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. @@ -1299,7 +1326,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + ScalarEvolution *SE) { const DataLayout &DL = BB.getModule()->getDataLayout(); bool MadeChange = false; @@ -1412,9 +1440,9 @@ if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, - DepWriteOffset, InstWriteOffset, *AA, - BB.getParent()); + OverwriteResult OR = + isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, DepWriteOffset, + InstWriteOffset, *AA, BB.getParent(), *SE); if (OR == OW_MaybePartial) OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, DepWrite, IOL); @@ -1508,13 +1536,14 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA, MemoryDependenceResults *MD, DominatorTree *DT, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + ScalarEvolution *SE) { bool MadeChange = false; for (BasicBlock &BB : F) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. if (DT->isReachableFromEntry(&BB)) - MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI); + MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI, SE); return MadeChange; } @@ -1609,6 +1638,7 @@ MemorySSA &MSSA; DominatorTree &DT; PostDominatorTree &PDT; + ScalarEvolution &SE; const TargetLibraryInfo &TLI; const DataLayout &DL; @@ -1634,14 +1664,15 @@ DenseMap IOLs; DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, - PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + PostDominatorTree &PDT, const TargetLibraryInfo &TLI, + ScalarEvolution &SE) + : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), SE(SE), + TLI(TLI), DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { - DSEState State(F, AA, MSSA, DT, PDT, TLI); + const TargetLibraryInfo &TLI, ScalarEvolution &SE) { + DSEState State(F, AA, MSSA, DT, PDT, TLI, SE); // Collect blocks with throwing instructions not modeled in MemorySSA and // alloc-like objects. unsigned PO = 0; @@ -1760,7 +1791,7 @@ int64_t InstWriteOffset, DepWriteOffset; if (auto CC = getLocForWriteEx(UseInst)) return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset, - InstWriteOffset, BatchAA, &F) == OW_Complete; + InstWriteOffset, BatchAA, &F, SE) == OW_Complete; return false; } @@ -1864,8 +1895,8 @@ } int64_t InstWriteOffset, DepWriteOffset; return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, - &F) == OW_Complete; + DepWriteOffset, InstWriteOffset, BatchAA, &F, + SE) == OW_Complete; } // Returns true if \p Use may read from \p DefLoc. @@ -2062,7 +2093,7 @@ } else { int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, - DepWriteOffset, InstWriteOffset, BatchAA, &F); + DepWriteOffset, InstWriteOffset, BatchAA, &F, SE); // If Current does not write to the same object as KillingDef, check // the next candidate. if (OR == OW_Unknown) { @@ -2475,10 +2506,11 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + ScalarEvolution &SE) { bool MadeChange = false; - DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI); + DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, SE); // For each store: for (unsigned I = 0; I < State.MemDefs.size(); I++) { MemoryDef *KillingDef = State.MemDefs[I]; @@ -2575,7 +2607,7 @@ int64_t InstWriteOffset, DepWriteOffset; OverwriteResult OR = isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset, - InstWriteOffset, State.BatchAA, &F); + InstWriteOffset, State.BatchAA, &F, SE); if (OR == OW_MaybePartial) { auto Iter = State.IOLs.insert( std::make_pair( @@ -2650,17 +2682,18 @@ AliasAnalysis &AA = AM.getResult(F); const TargetLibraryInfo &TLI = AM.getResult(F); DominatorTree &DT = AM.getResult(F); + ScalarEvolution &SE = AM.getResult(F); bool Changed = false; if (EnableMemorySSA) { MemorySSA &MSSA = AM.getResult(F).getMSSA(); PostDominatorTree &PDT = AM.getResult(F); - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI, SE); } else { MemoryDependenceResults &MD = AM.getResult(F); - Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI, &SE); } #ifdef LLVM_ENABLE_STATS @@ -2699,21 +2732,22 @@ AliasAnalysis &AA = getAnalysis().getAAResults(); DominatorTree &DT = getAnalysis().getDomTree(); + ScalarEvolution &SE = getAnalysis().getSE(); const TargetLibraryInfo &TLI = getAnalysis().getTLI(F); - bool Changed = false; if (EnableMemorySSA) { + MemorySSA &MSSA = getAnalysis().getMSSA(); PostDominatorTree &PDT = getAnalysis().getPostDomTree(); - Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI); + Changed = eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI, SE); } else { MemoryDependenceResults &MD = getAnalysis().getMemDep(); - Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI); + Changed = eliminateDeadStores(F, &AA, &MD, &DT, &TLI, &SE); } #ifdef LLVM_ENABLE_STATS @@ -2742,6 +2776,9 @@ AU.addRequired(); AU.addPreserved(); } + + AU.addRequired(); + AU.addPreserved(); } }; @@ -2758,6 +2795,7 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) Index: llvm/test/Transforms/DeadStoreElimination/MSSA/offsetted-overlapping-stores.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DeadStoreElimination/MSSA/offsetted-overlapping-stores.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -dse -S | FileCheck %s + +@BUFFER = external local_unnamed_addr global [0 x i8], align 1 + +define void @MissedDSEOpportunity2(i64 %0) { +; +; The DSE pass will try to kill the store of size i32 using the store of +; size i64 because they fully overlap, in fact: +; +; - they use the same base pointer (in SCEV style '@BUFFER + %0') +; - the offset between the two stores is 32 bits +; - the size of the earlier store is 32 bits +; - the size of the later store is 64 bits +; +; CHECK-LABEL: @MissedDSEOpportunity2( +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], -8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64* +; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4 +; CHECK-NEXT: ret void +; + %2 = add i64 %0, -8 + %3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2 + %4 = bitcast i8* %3 to i64* + %5 = add i64 %0, -4 + %6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5 + %7 = bitcast i8* %6 to i32* + store i32 1, i32* %7 + store i64 0, i64* %4 + ret void +}