diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1519,7 +1519,7 @@ auto *MD = dyn_cast_or_null(MSSA.getMemoryAccess(&I)); if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && - hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + State.getLocForWriteEx(&I) && isRemovable(&I)) State.MemDefs.push_back(MD); // Track alloca and alloca-like objects. Here we care about objects not @@ -1598,7 +1598,51 @@ UseLoc->Size.getValue() >= DefLoc.Size.getValue()); } - /// Returns true if \p Use may read from \p DefLoc. + bool isEnd(MemoryDef *Def) { + LLVM_DEBUG(dbgs() << " Check if def " << *Def << " (" + << *Def->getMemoryInst() + << ") is at the end the function \n"); + + auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + if (!MaybeLoc) { + LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); + return false; + } + + SmallVector WorkList; + SmallPtrSet Visited; + auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) { + if (!Visited.insert(Acc).second) + return; + for (Use &U : Acc->uses()) + WorkList.push_back(cast(U.getUser())); + }; + PushMemUses(Def); + for (unsigned I = 0; I < WorkList.size(); I++) { + if (WorkList.size() >= 100) { + LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n"); + return false; + } + + MemoryAccess *UseAccess = WorkList[I]; + if (isa(UseAccess)) { + PushMemUses(UseAccess); + continue; + } + + Instruction *UseInst = cast(UseAccess)->getMemoryInst(); + if (isReadClobber(*MaybeLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); + return false; + } + + if (MemoryDef *UseDef = dyn_cast(UseAccess)) + PushMemUses(UseDef); + } + return true; + } + + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { if (!UseInst->mayReadFromMemory()) return false; @@ -1822,6 +1866,42 @@ return false; } + + bool eliminateEnds() { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + LLVM_DEBUG( + dbgs() + << "Trying to eliminate MemoryDefs at the end of the function\n"); + for (int I = MemDefs.size() - 1; I >= 0; I--) { + MemoryDef *Def = MemDefs[I]; + if (SkipStores.find(Def) != SkipStores.end()) + continue; + if (isEnd(Def)) { + Instruction *DefI = Def->getMemoryInst(); + // See through pointer-to-pointer bitcasts + SmallVector Pointers; + GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL); + + LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " + "of the function\n"); + bool CanKill = true; + for (const Value *Pointer : Pointers) { + if (!InvisibleToCaller.count(Pointer)) { + CanKill = false; + break; + } + } + + if (CanKill) { + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; + } + } + } + return MadeChange; + } }; bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, @@ -1950,6 +2030,8 @@ for (auto &KV : State.IOLs) MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); + MadeChange |= State.eliminateEnds(); + MemorySSAUpdater MSSAU(&MSSA); for (auto &BB : F) { if (!DT.isReachableFromEntry(&BB)) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s declare i8* @strcpy(i8* %dest, i8* %src) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -72,5 +72,5 @@ ; Check that the store is removed and that the memcpy is still there ; CHECK-LABEL: foo ; CHECK-NOT: store i8 -; CHECK: call void @llvm.memcpy +; CHECK-NOT: call void @llvm.memcpy ; CHECK: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -211,11 +211,9 @@ ; Stores to stack objects can be eliminated if they are not captured inside the function. define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( -; CHECK-NEXT: [[M:%.*]] = alloca i8 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -237,7 +235,6 @@ ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -260,7 +257,6 @@ ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -29,7 +29,6 @@ ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -246,7 +246,6 @@ ; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i64 10) -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -174,10 +174,8 @@ define void @test11() { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[P:%.*]] = alloca i32 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -93,35 +93,6 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) -define void @test14(i32* %Q) { -; CHECK-LABEL: @test14( -; CHECK-NEXT: ret void -; - %P = alloca i32 - %DEAD = load i32, i32* %Q - store i32 %DEAD, i32* %P - ret void - -} - -define void @test20() { -; CHECK-LABEL: @test20( -; CHECK-NEXT: ret void -; - %m = call i8* @malloc(i32 24) - store i8 0, i8* %m - ret void -} - -define void @test21() { -; CHECK-LABEL: @test21( -; CHECK-NEXT: ret void -; - %m = call i8* @calloc(i32 9, i32 7) - store i8 0, i8* %m - ret void -} - define void @test22(i1 %i, i32 %k, i32 %m) nounwind { ; CHECK-LABEL: @test22( ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -178,6 +178,35 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) +define void @test14(i32* %Q) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: ret void +; + %P = alloca i32 + %DEAD = load i32, i32* %Q + store i32 %DEAD, i32* %P + ret void + +} + +define void @test20() { +; CHECK-LABEL: @test20( +; CHECK-NEXT: ret void +; + %m = call i8* @malloc(i32 24) + store i8 0, i8* %m + ret void +} + +define void @test21() { +; CHECK-LABEL: @test21( +; CHECK-NEXT: ret void +; + %m = call i8* @calloc(i32 9, i32 7) + store i8 0, i8* %m + ret void +} + ; The store here is not dead because the byval call reads it. declare void @test19f({i32}* byval align 4 %P)