diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -43,7 +44,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -1495,7 +1495,8 @@ WalkResult getNextMemoryDef(MemoryAccess *Current, MemoryLocation DefLoc, Instruction *DefObj, bool DefVisibleToCaller, int ScanLimit, AliasAnalysis &AA, - const TargetLibraryInfo &TLI, DominatorTree &DT) { + const TargetLibraryInfo &TLI, DominatorTree &DT, + DenseMap &PostOrderMap) { if (--ScanLimit < 0) return {WalkType::Bad, nullptr}; @@ -1503,9 +1504,18 @@ // Scan the uses to see if we have a single interesting MemoryAccess. for (Use &U : Current->uses()) { MemoryAccess *UseAccess = cast(U.getUser()); - // Bail out on MemoryPhis for now. - if (isa(UseAccess)) - return {WalkType::Bad, nullptr}; + + // For MemoryPhis, bail out if we already found a NextAccess or if the + // post-order number of the MemoryPhi's block is greater or equal than the + // number of the current access. This ensures we do not visit accesses that + // may be executed before the current access. + if (isa(UseAccess)) { + if (NextAccess || PostOrderMap[UseAccess->getBlock()] >= + PostOrderMap[Current->getBlock()]) + return {WalkType::Bad, nullptr}; + NextAccess = UseAccess; + continue; + } Instruction *UseInst = cast(UseAccess)->getMemoryInst(); LLVM_DEBUG(dbgs() << " Checking use " << *UseInst << "\n"); @@ -1554,16 +1564,16 @@ // def or it is an unrelated MemoryAccess and we continue the walk starting at // NextAccess. if (NextAccess) { - assert(isa(NextAccess) && "Only MemoryDefs supported for now"); - - if (!isNoopIntrinsic(cast(NextAccess))) { - if (isWriteClobber(DefLoc, - cast(NextAccess)->getMemoryInst(), AA, - TLI, DT)) - return {WalkType::Next, cast(NextAccess)}; + if (auto *NextDef = dyn_cast(NextAccess)) { + if (!isNoopIntrinsic(NextDef)) { + if (isWriteClobber(DefLoc, NextDef->getMemoryInst(), AA, TLI, DT)) { + return {WalkType::Next, NextDef}; + } + } } + assert(isa(NextAccess) || isa(NextAccess)); return getNextMemoryDef(NextAccess, DefLoc, DefObj, DefVisibleToCaller, - ScanLimit, AA, TLI, DT); + ScanLimit, AA, TLI, DT, PostOrderMap); } // No aliasing definition found, bail out. @@ -1676,23 +1686,29 @@ // Keep track of blocks with throwing instructions not modeled in MemorySSA. SmallPtrSet ThrowingBlocks; - // Collect blocks with throwing instructions not modeled in MemroySSA and - // alloc-like objects. - for (Instruction &I : instructions(F)) { - if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) - ThrowingBlocks.insert(I.getParent()); - - auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); - if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) - Stores.push_back(MD); - - // Track alloca and alloca-like objects. Here we care about objects not - // visible to the caller during function execution. Alloca objects are - // invalid in the caller, for alloca-like objects we ensure that they are - // not captured throughout the function. - if (isa(&I) || - (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) - InvisibleToCaller.insert(&I); + // Post-order numbers for basic blocks in the function. + DenseMap PostOrderNumbers; + unsigned PO = 0; + // Collect blocks with throwing instructions, alloc-like objects and + // post-order numbers for basic blocks. + for (BasicBlock *BB : post_order(&F)) { + PostOrderNumbers[BB] = PO++; + for (Instruction &I : *BB) { + if (I.mayThrow() && !MSSA.getMemoryAccess(&I)) + ThrowingBlocks.insert(I.getParent()); + + auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); + if (MD && hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + Stores.push_back(MD); + + // Track alloca and alloca-like objects. Here we care about objects not + // visible to the caller during function execution. Alloca objects are + // invalid in the caller, for alloca-like objects we ensure that they are + // not captured throughout the function. + if (isa(&I) || + (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) + InvisibleToCaller.insert(&I); + } } // Treat byval or inalloca arguments the same as Allocas, stores to them are // dead at the end of the function. @@ -1719,7 +1735,8 @@ // Walk MemorySSA forward to find a MemoryDef that kills SI. WalkResult Next; while ((Next = getNextMemoryDef(SIMD, SILoc, DefObj, LocVisibleToCaller, - MemorySSAScanLimit, AA, TLI, DT))) { + MemorySSAScanLimit, AA, TLI, DT, + PostOrderNumbers))) { MemoryAccess *NextAccess = Next.Next; LLVM_DEBUG(dbgs() << " Checking " << *NextAccess << "\n"); MemoryDef *ND = dyn_cast(NextAccess); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-debugcounter.ll @@ -35,7 +35,6 @@ ; SKIP-2-NEXT: ret void ; ; COUNT-1-LABEL: @test2( -; COUNT-1-NEXT: store i32 1, i32* [[P:%.*]] ; COUNT-1-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; COUNT-1: bb1: ; COUNT-1-NEXT: br label [[BB3:%.*]] @@ -43,7 +42,8 @@ ; COUNT-1-NEXT: br label [[BB3]] ; COUNT-1: bb3: ; COUNT-1-NEXT: store i32 0, i32* [[Q:%.*]] -; COUNT-1-NEXT: store i32 0, i32* [[P]] +; COUNT-1-NEXT: store i32 0, i32* [[Q]] +; COUNT-1-NEXT: store i32 0, i32* [[P:%.*]] ; COUNT-1-NEXT: ret void ; ; COUNT-0-LABEL: @test2( @@ -60,6 +60,7 @@ ; COUNT-0-NEXT: ret void ; ; SKIP-1-COUNT-1-LABEL: @test2( +; SKIP-1-COUNT-1-NEXT: store i32 1, i32* [[P:%.*]] ; SKIP-1-COUNT-1-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; SKIP-1-COUNT-1: bb1: ; SKIP-1-COUNT-1-NEXT: br label [[BB3:%.*]] @@ -67,8 +68,7 @@ ; SKIP-1-COUNT-1-NEXT: br label [[BB3]] ; SKIP-1-COUNT-1: bb3: ; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[Q:%.*]] -; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[Q]] -; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[P:%.*]] +; SKIP-1-COUNT-1-NEXT: store i32 0, i32* [[P]] ; SKIP-1-COUNT-1-NEXT: ret void ; store i32 1, i32* %P diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -18,7 +18,6 @@ ; CHECK-NEXT: invoke void @f() ; CHECK-NEXT: to label [[BLOCK3:%.*]] unwind label [[CATCH_DISPATCH:%.*]] ; CHECK: block3: -; CHECK-NEXT: store i32 30, i32* [[SV]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: catch.dispatch: ; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catch] unwind label [[CLEANUP:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll @@ -27,10 +27,9 @@ define void @test14(i32* noalias %P) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] ; CHECK: end: ; CHECK-NEXT: ret void @@ -48,6 +47,31 @@ define void @test18(i32* noalias %P) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR:%.*]] +; CHECK: for: +; CHECK-NEXT: store i32 1, i32* [[P:%.*]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: store i32 2, i32* [[P]] +; CHECK-NEXT: br i1 false, label [[FOR]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + store i32 0, i32* %P + br label %for +for: + store i32 1, i32* %P + %x = load i32, i32* %P + store i32 2, i32* %P + br i1 false, label %for, label %end +end: + ret void +} + + +define void @test18_partial(i32* noalias %P) { +; CHECK-LABEL: @test18_partial( +; CHECK-NEXT: entry: ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8* ; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: br label [[FOR:%.*]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll @@ -33,13 +33,11 @@ ; CHECK-LABEL: @test5( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; br i1 true, label %bb1, label %bb2 @@ -58,13 +56,12 @@ ; CHECK-LABEL: @test8( ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P:%.*]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[Q:%.*]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 0, i32* [[P]] +; CHECK-NEXT: store i32 0, i32* [[P:%.*]] ; CHECK-NEXT: ret void ; br i1 true, label %bb1, label %bb2 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-throwing.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"