Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1070,26 +1070,34 @@ if (!hasMemoryWrite(Inst, *TLI)) continue; - // eliminateNoopStore will update in iterator, if necessary. - if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) { - MadeChange = true; - continue; - } + // Attempt to eliminate a noop store last. The store may not be a noop at + // this point, but could be rendered one by the removal of dead intervening + // stores below. + auto EliminateNoop = [&]() { + if (Inst /* may be nulled if merged with partial store */ && + eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) + MadeChange = true; + }; // If we find something that writes memory, get its memory dependence. MemDepResult InstDep = MD->getDependency(Inst); // Ignore any store where we can't find a local dependence. // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) + if (!InstDep.isDef() && !InstDep.isClobber()) { + // Noops can be removed across blocks, even with no local memdef + EliminateNoop(); continue; + } // Figure out what location is being stored to. MemoryLocation Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. - if (!Loc.Ptr) + if (!Loc.Ptr) { + EliminateNoop(); continue; + } // Loop until we find a store we can eliminate or a load that // invalidates the analysis. Without an upper bound on the number of @@ -1230,6 +1238,7 @@ // Delete the old stores and now-dead instructions that feed them. deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering); + Inst = nullptr; deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering); MadeChange = true; @@ -1258,6 +1267,7 @@ DepWrite->getIterator(), &BB, /*QueryInst=*/ nullptr, &Limit); } + EliminateNoop(); } if (EnablePartialOverwriteTracking) Index: test/Transforms/DeadStoreElimination/late-noop.ll =================================================================== --- test/Transforms/DeadStoreElimination/late-noop.ll +++ test/Transforms/DeadStoreElimination/late-noop.ll @@ -0,0 +1,48 @@ +; Test to make sure noop stores exposed by removal of intervening stores can be removed, +; even when the original load is in a different block +; RUN: opt < %s -basicaa -dse -S | FileCheck %s + +define i32 @foo2(i32* %i, i1 %cond) { +; Both stores removed, first dead, second as late noop +B1: +; CHECK-LABEL: B1 +; CHECK-NEXT: %val.i = load i32, i32* %i, align 4 +; CHECK-NEXT: br i1 %cond + %val.i = load i32, i32* %i, align 4 + %newval.i = add i32 %val.i, 1 + store i32 %newval.i, i32* %i, align 4 + store i32 %val.i, i32* %i, align 4 + br i1 %cond, label %B2, label %B3 + +; Both stores removed, first dead, second as late noop +B2: +; CHECK: B2: +; CHECK-NEXT: br label %B4 + %val.i2 = load i32, i32* %i, align 4 + %newval.i2 = add i32 %val.i2, 1 + store i32 %newval.i2, i32* %i, align 4 + store i32 %val.i, i32* %i, align 4 + br label %B4 + +; Store not dead, no dep in the same block +B3: +; CHECK: B3 +; CHECK-NEXT: %val.i3 = load i32, i32* %i, align 4 +; CHECK-NEXT: %newval.i3 = add i32 %val.i3, 1 +; CHECK-NEXT: store i32 %newval.i3, i32* %i, align 4 +; CHECK-NEXT: br label %B4 + %val.i3 = load i32, i32* %i, align 4 + %newval.i3 = add i32 %val.i3, 1 + store i32 %newval.i3, i32* %i, align 4 + br label %B4 + +; Store not removed as noop due to presence of store in B3 +B4: +; CHECK: B4 +; CHECK-NEXT: store i32 %val.i, i32* %i, align 4 +; CHECK-NEXT: %val.i4 = load i32, i32* %i, align 4 +; CHECK-NEXT: ret i32 %val.i4 + store i32 %val.i, i32* %i, align 4 + %val.i4 = load i32, i32* %i, align 4 + ret i32 %val.i4 +} Index: test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- test/Transforms/DeadStoreElimination/simple.ll +++ test/Transforms/DeadStoreElimination/simple.ll @@ -521,3 +521,19 @@ store i32 0, i32* %p ret void } + +; Basic late noop removal +; CHECK-LABEL: @test36( +; CHECK-NEXT: %val.i2 = load i32, i32* %i, align 4 +; CHECK-NEXT: ret +define i32 @test36(i32* %i) { + %val.i = load i32, i32* %i, align 4 + %newval.i1 = add i32 %val.i, 1 + store i32 %newval.i1, i32* %i, align 4 + %newval.i2 = add i32 %newval.i1, 1 + store i32 %newval.i2, i32* %i, align 4 + store i32 %val.i, i32* %i, align 4 + %val.i2 = load i32, i32* %i, align 4 + ret i32 %val.i2 +} +