Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -463,14 +463,24 @@ const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); + // Query the alias information + AliasResult AAR = AA.alias(Later, Earlier); + // If the start pointers are the same, we just have to compare sizes to see if // the later store was larger than the earlier store. - if (P1 == P2 || AA.isMustAlias(P1, P2)) { + if (P1 == P2 || (AAR == AliasResult::MustAlias)) { // Make sure that the Later size is >= the Earlier size. if (LaterSize >= EarlierSize) return OW_Complete; } + // If we hit a partial alias we may have a full overwrite + if (AAR == AliasResult::PartialAlias) { + int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0); + if ((Off > 0) && (((uint64_t)Off + EarlierSize) <= LaterSize)) + return OW_Complete; + } + // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval/inalloca argument). If so, then it clearly // overwrites any other store to the same object. @@ -1301,6 +1311,7 @@ MemoryDependenceResults *MD, DominatorTree *DT, const TargetLibraryInfo *TLI) { const DataLayout &DL = BB.getModule()->getDataLayout(); + BatchAAResults BatchAA(*AA, /*CacheOffsets =*/true); bool MadeChange = false; MapVector ThrowableInst; @@ -1412,9 +1423,9 @@ if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, - DepWriteOffset, InstWriteOffset, *AA, - BB.getParent()); + OverwriteResult OR = + isOverwrite(Inst, DepWrite, Loc, DepLoc, DL, *TLI, DepWriteOffset, + InstWriteOffset, BatchAA, BB.getParent()); if (OR == OW_MaybePartial) OR = isPartialOverwrite(Loc, DepLoc, DepWriteOffset, InstWriteOffset, DepWrite, IOL); @@ -1635,8 +1646,8 @@ DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + : F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT), + PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, Index: llvm/test/Transforms/DeadStoreElimination/MSSA/offsetted-overlapping-stores.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DeadStoreElimination/MSSA/offsetted-overlapping-stores.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -dse -S | FileCheck %s + +@BUFFER = external local_unnamed_addr global [0 x i8], align 1 + +define void @MissedDSEOpportunity2(i64 %0) { +; +; The DSE pass will try to kill the store of size i32 using the store of +; size i64 because they fully overlap, in fact: +; +; - they use the same base pointer (in SCEV style '@BUFFER + %0') +; - the offset between the two stores is 32 bits +; - the size of the earlier store is 32 bits +; - the size of the later store is 64 bits +; +; CHECK-LABEL: @MissedDSEOpportunity2( +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], -8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64* +; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4 +; CHECK-NEXT: ret void +; + %2 = add i64 %0, -8 + %3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2 + %4 = bitcast i8* %3 to i64* + %5 = add i64 %0, -4 + %6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5 + %7 = bitcast i8* %6 to i32* + store i32 1, i32* %7 + store i64 0, i64* %4 + ret void +}