diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -42,7 +43,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -1494,7 +1494,8 @@ // * Multiple MemoryDefs, which indicates a split in the control flow. WalkResult getNextMemoryDef(MemoryAccess *Current, MemoryDef *Original, int ScanLimit, AliasAnalysis &AA, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, + DenseMap &PostOrderMap) { if (--ScanLimit <= 0) return {WalkType::Bad, nullptr}; @@ -1504,13 +1505,23 @@ // Scan the uses to see if we have a single interesting MemoryAccess. for (Use &U : Current->uses()) { MemoryAccess *UseAccess = cast(U.getUser()); - // Bail out on MemoryPhis for now. - if (isa(UseAccess)) - return {WalkType::Bad, nullptr}; + + // For MemoryPhis, bail out if we already found a NextAccess or if the + // post-order number of the MemoryPhi's block is greater or equal than the + // number of the current access. This ensures we do not visit accesses that + // may be executed before the current access. + if (isa(UseAccess)) { + if (NextAccess || PostOrderMap[UseAccess->getBlock()] >= + PostOrderMap[Current->getBlock()]) + return {WalkType::Bad, nullptr}; + NextAccess = UseAccess; + continue; + } LLVM_DEBUG(dbgs() << " Checking use " << *cast(UseAccess)->getMemoryInst() << "\n"); + // We can remove the dead stores, irrespective of the fence and its ordering // (release/acquire/seq_cst). Fences only constraints the ordering of // already visible stores, it does not make a store visible to other @@ -1542,10 +1553,14 @@ // def or it is an unrelated MemoryAccess and we continue the walk starting at // NextAccess. if (NextAccess) { - assert(isa(NextAccess) && "Only MemoryDefs supported for now"); - if (isWriteClobber(OriginalLoc, cast(NextAccess), AA, TLI)) - return {WalkType::Next, cast(NextAccess)}; - return getNextMemoryDef(NextAccess, Original, ScanLimit, AA, TLI); + if (auto *NextDef = dyn_cast(NextAccess)) { + if (isWriteClobber(OriginalLoc, cast(NextDef), AA, TLI)) { + return {WalkType::Next, NextDef}; + } + } + assert(isa(NextAccess) || isa(NextAccess)); + return getNextMemoryDef(NextAccess, Original, ScanLimit, AA, TLI, + PostOrderMap); } // No aliasing definition found, bail out. @@ -1665,22 +1680,30 @@ // Keep track of blocks with throwing instructions. SmallPtrSet ThrowingBlocks; - // Collect blocks with throwing instructions and alloc-like objects. - for (Instruction &I : instructions(F)) { - if (isThrowLikeInstruction(&I)) - ThrowingBlocks.insert(I.getParent()); - - auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); - if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) - Stores.push_back(MD); - - // Track alloca and alloca-like objects - if (isa(&I)) - NonEscapingStackObjects.insert(&I); - // Okay, so these are dead heap objects, but if the pointer never escapes - // then it's leaked by this function anyways. - else if (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)) - NonEscapingStackObjects.insert(&I); + // Post-order numbers for basic blocks in the function. + DenseMap PostOrderNumbers; + unsigned PO = 0; + // Collect blocks with throwing instructions, alloc-like objects and + // post-order numbers for basic blocks. + for (BasicBlock *BB : post_order(&F)) { + PostOrderNumbers[BB] = PO++; + for (Instruction &I : *BB) { + if (isThrowLikeInstruction(&I)) + ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + Stores.push_back(MD); + + // Track alloca and alloca-like objects + if (isa(&I)) + NonEscapingStackObjects.insert(&I); + // Okay, so these are dead heap objects, but if the pointer never escapes + // then it's leaked by this function anyways. + else if (isAllocLikeFn(&I, &TLI) && + !PointerMayBeCaptured(&I, false, true)) + NonEscapingStackObjects.insert(&I); + } } // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. @@ -1704,8 +1727,8 @@ // Walk MemorySSA forward to find a MemoryDef that kills SI. WalkResult Next; - while ( - (Next = getNextMemoryDef(SIMD, SIDef, MemorySSAScanLimit, AA, TLI))) { + while ((Next = getNextMemoryDef(SIMD, SIDef, MemorySSAScanLimit, AA, TLI, + PostOrderNumbers))) { MemoryAccess *NextAccess = Next.Next; LLVM_DEBUG(dbgs() << " Checking " << *NextAccess << "\n"); MemoryDef *ND = dyn_cast(NextAccess); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll @@ -35,7 +35,6 @@ ; SKIP-2-NEXT: ret void ; ; COUNT-1-LABEL: @test2( -; COUNT-1-NEXT: store i32 1, i32* [[P:%.*]] ; COUNT-1-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; COUNT-1: bb1: ; COUNT-1-NEXT: br label [[BB3:%.*]] @@ -43,7 +42,8 @@ ; COUNT-1-NEXT: br label [[BB3]] ; COUNT-1: bb3: ; COUNT-1-NEXT: store i32 0, i32* [[Q:%.*]] -; COUNT-1-NEXT: store i32 0, i32* [[P]] +; COUNT-1-NEXT: store i32 0, i32* [[Q]] +; COUNT-1-NEXT: store i32 0, i32* [[P:%.*]] ; COUNT-1-NEXT: ret void ; ; COUNT-0-LABEL: @test2( @@ -60,6 +60,7 @@ ; COUNT-0-NEXT: ret void ; ; SKIP-1-COUNT-1-LABEL: @test2( +; SKIP-1-COUNT-1-NEXT: store i32 1, i32* [[P:%.*]] ; SKIP-1-COUNT-1-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; SKIP-1-COUNT-1: bb1: ; SKIP-1-COUNT-1-NEXT: br label [[BB3:%.*]] @@ -67,8 +68,7 @@ ; SKIP-1-COUNT-1-NEXT: br label [[BB3]] ; SKIP-1-COUNT-1: bb3: ; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[Q:%.*]] -; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[Q]] -; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[P:%.*]] +; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[P]] ; SKIP-1-COUNT-1-NEXT: ret void ; store i32 1, i32* %P diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -27,10 +27,9 @@ define void @test14(i32* noalias %P) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret void @@ -48,6 +47,31 @@ define void @test18(i32* noalias %P) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + store i32 0, i32* %P + br label %for +for: + store i32 1, i32* %P + %x = load i32, i32* %P + store i32 2, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test18_partial(i32* noalias %P) { +; CHECK-LABEL: @test18_partial( +; CHECK-NEXT: entry: ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: br label [[FOR:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -33,13 +33,11 @@ ; CHECK-LABEL: @test5( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; br i1 true, label %bb1, label %bb2 @@ -58,13 +56,12 @@ ; CHECK-LABEL: @test8( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; br i1 true, label %bb1, label %bb2