Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1634,18 +1634,6 @@ /// basic block. DenseMap IOLs; - struct CheckCache { - SmallPtrSet KnownNoReads; - SmallPtrSet KnownReads; - - bool isKnownNoRead(MemoryAccess *A) const { - return KnownNoReads.find(A) != KnownNoReads.end(); - } - bool isKnownRead(MemoryAccess *A) const { - return KnownReads.find(A) != KnownReads.end(); - } - }; - DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), @@ -1940,9 +1928,8 @@ Optional getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess, const MemoryLocation &DefLoc, const Value *DefUO, - CheckCache &Cache, unsigned &ScanLimit, - unsigned &WalkerStepLimit, bool IsMemTerm, - unsigned &PartialLimit) { + unsigned &ScanLimit, unsigned &WalkerStepLimit, + bool IsMemTerm, unsigned &PartialLimit) { if (ScanLimit == 0 || WalkerStepLimit == 0) { LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n"); return None; @@ -1954,6 +1941,7 @@ LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); // Find the next clobbering Mod access for DefLoc, starting at StartAccess. + Optional CurrentLoc; do { StepAgain = false; LLVM_DEBUG({ @@ -2017,12 +2005,8 @@ // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && - (Cache.KnownReads.contains(Current) || - isReadClobber(DefLoc, CurrentI))) { - Cache.KnownReads.insert(Current); + if (!isa(CurrentI) && isReadClobber(DefLoc, CurrentI)) return None; - } // Quick check if there are direct uses that are read-clobbers. if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { @@ -2031,7 +2015,6 @@ isReadClobber(DefLoc, UseOrDef->getMemoryInst()); return false; })) { - Cache.KnownReads.insert(Current); LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); return None; } @@ -2045,13 +2028,24 @@ } // If Current does not have an analyzable write location, skip it - auto CurrentLoc = getLocForWriteEx(CurrentI); + CurrentLoc = getLocForWriteEx(CurrentI); if (!CurrentLoc) { StepAgain = true; Current = CurrentDef->getDefiningAccess(); continue; } + // AliasAnalysis does not account for loops. Limit elimination to + // candidates for which we can guarantee they always store to the same + // memory location and not multiple locations in a loop. + if (Current->getBlock() != KillingDef->getBlock() && + !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { + StepAgain = true; + Current = CurrentDef->getDefiningAccess(); + WalkerStepLimit -= 1; + continue; + } + if (IsMemTerm) { // If the killing def is a memory terminator (e.g. lifetime.end), check // the next candidate if the current Current does not write the same @@ -2062,17 +2056,6 @@ } continue; } else { - // AliasAnalysis does not account for loops. Limit elimination to - // candidates for which we can guarantee they always store to the same - // memory location and not multiple locations in a loop. - if (Current->getBlock() != KillingDef->getBlock() && - !IsGuaranteedLoopInvariant(const_cast(CurrentLoc->Ptr))) { - StepAgain = true; - Current = CurrentDef->getDefiningAccess(); - WalkerStepLimit -= 1; - continue; - } - int64_t InstWriteOffset, DepWriteOffset; auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI, DepWriteOffset, InstWriteOffset, BatchAA, &F); @@ -2133,18 +2116,6 @@ } --ScanLimit; NumDomMemDefChecks++; - - // Check if we already visited this access. - if (Cache.isKnownNoRead(UseAccess)) { - LLVM_DEBUG(dbgs() << " ... skip, discovered that " << *UseAccess - << " is safe earlier.\n"); - continue; - } - if (Cache.isKnownRead(UseAccess)) { - LLVM_DEBUG(dbgs() << " ... bail out, discovered that " << *UseAccess - << " has a read-clobber earlier.\n"); - return None; - } KnownNoReads.insert(UseAccess); if (isa(UseAccess)) { @@ -2172,7 +2143,7 @@ // A memory terminator kills all preceeding MemoryDefs and all succeeding // MemoryAccesses. We do not have to check it's users. - if (isMemTerminator(DefLoc, KillingI, UseInst)) { + if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) { LLVM_DEBUG( dbgs() << " ... skipping, memterminator invalidates following accesses\n"); @@ -2187,19 +2158,13 @@ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) { LLVM_DEBUG(dbgs() << " ... found throwing instruction\n"); - Cache.KnownReads.insert(UseAccess); - Cache.KnownReads.insert(StartAccess); - Cache.KnownReads.insert(EarlierAccess); return None; } // Uses which may read the original MemoryDef mean we cannot eliminate the // original MD. Stop walk. - if (isReadClobber(DefLoc, UseInst)) { + if (isReadClobber(*CurrentLoc, UseInst)) { LLVM_DEBUG(dbgs() << " ... found read clobber\n"); - Cache.KnownReads.insert(UseAccess); - Cache.KnownReads.insert(StartAccess); - Cache.KnownReads.insert(EarlierAccess); return None; } @@ -2223,7 +2188,7 @@ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (isCompleteOverwrite(DefLoc, KillingI, UseInst)) { + if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) { if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != EarlierAccess) { BasicBlock *MaybeKillingBlock = UseInst->getParent(); @@ -2311,7 +2276,6 @@ // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is // potentially dead. - Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end()); return {EarlierAccess}; } @@ -2550,7 +2514,6 @@ bool Shortend = false; bool IsMemTerm = State.isMemTerminatorInst(SI); - DSEState::CheckCache Cache; // Check if MemoryAccesses in the worklist are killed by KillingDef. for (unsigned I = 0; I < ToCheck.size(); I++) { Current = ToCheck[I]; @@ -2558,8 +2521,8 @@ continue; Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit, - WalkerStepLimit, IsMemTerm, PartialLimit); + KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit, + IsMemTerm, PartialLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); Index: llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -41,13 +41,13 @@ } ; Post-dominating store. +; TODO: The memset can be shortened. define void @accessible_after_return_2(i32* noalias %P, i1 %c) { ; CHECK-LABEL: @accessible_after_return_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 Index: llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/out-of-bounds-stores.ll @@ -36,6 +36,8 @@ ; CHECK-NEXT: [[D:%.*]] = alloca [1 x i32], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[D]], i64 0, i64 0 +; CHECK-NEXT: store i32 10, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: ; CHECK-NEXT: br i1 [[C:%.*]], label [[FOR_BODY_1:%.*]], label [[FOR_END:%.*]] Index: llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/overlap.ll @@ -67,13 +67,11 @@ ret void } -; TODO: The store to %a0 is dead, because only %a1 is read later. +; The store to %a0 is dead, because only %a1 is read later. define void @test3(i1 %c) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1 -; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0 ; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1 -; CHECK-NEXT: store i8 1, i8* [[A0]], align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1 @@ -102,10 +100,8 @@ define void @test4(i1 %c) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[A:%.*]] = alloca [2 x i8], align 1 -; CHECK-NEXT: [[A0:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 0 ; CHECK-NEXT: [[A1:%.*]] = getelementptr [2 x i8], [2 x i8]* [[A]], i32 0, i32 1 ; CHECK-NEXT: store i8 1, i8* [[A1]], align 1 -; CHECK-NEXT: store i8 1, i8* [[A0]], align 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: store [2 x i8] zeroinitializer, [2 x i8]* [[A]], align 1 Index: llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll +++ llvm/test/Transforms/DeadStoreElimination/MSSA/scoped-noalias.ll @@ -4,13 +4,13 @@ ; Assume that %p1 != %p2 if and only if %c is true. In that case the noalias ; metadata is correct, but the first store cannot be eliminated, as it may be ; read-clobbered by the load. -; TODO The store is incorrectly eliminated. define void @test(i1 %c, i8* %p1, i8* %p2) { ; CHECK-LABEL: @test( +; CHECK-NEXT: store i8 0, i8* [[P1:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1, !alias.scope !0 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: store i8 1, i8* [[P1:%.*]], align 1, !noalias !0 +; CHECK-NEXT: store i8 1, i8* [[P1]], align 1, !noalias !0 ; CHECK-NEXT: ret void ; CHECK: else: ; CHECK-NEXT: store i8 2, i8* [[P1]], align 1