diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1535,7 +1535,7 @@ auto *MD = dyn_cast_or_null(MA); if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit && - hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) + State.getLocForWriteEx(&I) && isRemovable(&I)) State.MemDefs.push_back(MD); // Track whether alloca and alloca-like objects are visible in the @@ -1617,6 +1617,52 @@ } /// Returns true if \p Use may read from \p DefLoc. + + bool isEnd(MemoryDef *Def) { + LLVM_DEBUG(dbgs() << " Check if def " << *Def << " (" + << *Def->getMemoryInst() + << ") is at the end the function \n"); + + auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + if (!MaybeLoc) { + LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); + return false; + } + + SmallVector WorkList; + SmallPtrSet Visited; + auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) { + if (!Visited.insert(Acc).second) + return; + for (Use &U : Acc->uses()) + WorkList.push_back(cast(U.getUser())); + }; + PushMemUses(Def); + for (unsigned I = 0; I < WorkList.size(); I++) { + if (WorkList.size() >= 100) { + LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n"); + return false; + } + + MemoryAccess *UseAccess = WorkList[I]; + if (isa(UseAccess)) { + PushMemUses(UseAccess); + continue; + } + + Instruction *UseInst = cast(UseAccess)->getMemoryInst(); + if (isReadClobber(*MaybeLoc, UseInst)) { + LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n"); + return false; + } + + if (MemoryDef *UseDef = dyn_cast(UseAccess)) + PushMemUses(UseDef); + } + return true; + } + + // Returns true if \p Use may read from \p DefLoc. bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const { if (!UseInst->mayReadFromMemory()) return false; @@ -1916,6 +1962,42 @@ return false; } + + bool eliminateEnds() { + const DataLayout &DL = F.getParent()->getDataLayout(); + bool MadeChange = false; + LLVM_DEBUG( + dbgs() + << "Trying to eliminate MemoryDefs at the end of the function\n"); + for (int I = MemDefs.size() - 1; I >= 0; I--) { + MemoryDef *Def = MemDefs[I]; + if (SkipStores.find(Def) != SkipStores.end()) + continue; + if (isEnd(Def)) { + Instruction *DefI = Def->getMemoryInst(); + // See through pointer-to-pointer bitcasts + SmallVector Pointers; + GetUnderlyingObjects(getLocForWriteEx(DefI)->Ptr, Pointers, DL); + + LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " + "of the function\n"); + bool CanKill = true; + for (const Value *Pointer : Pointers) { + if (!InvisibleToCallerAfterRet.count(Pointer)) { + CanKill = false; + break; + } + } + + if (CanKill) { + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; + } + } + } + return MadeChange; + } }; /// \returns true if \p KillingDef stores the result of \p Load to the source of @@ -2079,6 +2161,7 @@ for (auto &KV : State.IOLs) MadeChange |= removePartiallyOverlappedStores(&AA, DL, KV.second); + MadeChange |= State.eliminateEnds(); return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/libcalls.ll @@ -1,4 +1,3 @@ -; XFAIL: * ; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s declare i8* @strcpy(i8* %dest, i8* %src) nounwind diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll @@ -72,5 +72,5 @@ ; Check that the store is removed and that the memcpy is still there ; CHECK-LABEL: foo ; CHECK-NOT: store i8 -; CHECK: call void @llvm.memcpy +; CHECK-NOT: call void @llvm.memcpy ; CHECK: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll @@ -211,11 +211,9 @@ ; Stores to stack objects can be eliminated if they are not captured inside the function. define void @test_alloca_nocapture_1() { ; CHECK-LABEL: @test_alloca_nocapture_1( -; CHECK-NEXT: [[M:%.*]] = alloca i8 ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -237,7 +235,6 @@ ; CHECK-NEXT: call void @capture(i8* [[M]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 @@ -260,7 +257,6 @@ ; CHECK: exit: ; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0 ; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]] -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: ret void ; %m = alloca i8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll @@ -29,7 +29,6 @@ ; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none [] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: store i32 40, i32* [[SV]] ; CHECK-NEXT: ret void ; block1: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -246,7 +246,6 @@ ; CHECK-NEXT: br i1 true, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: [[M:%.*]] = call noalias i8* @malloc(i64 10) -; CHECK-NEXT: store i8 1, i8* [[M]] ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[R:%.*]] = phi i8* [ null, [[BB1:%.*]] ], [ [[M]], [[BB2]] ] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -123,18 +123,10 @@ define void @alloca_1(i1 %c) { ; CHECK-LABEL: @alloca_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32] -; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void @@ -160,20 +152,10 @@ define void @alloca_2(i1 %c) { ; CHECK-LABEL: @alloca_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32] -; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void @@ -204,20 +186,12 @@ define void @alloca_3(i1 %c) { ; CHECK-LABEL: @alloca_3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P_ALLOCA:%.*]] = alloca [32 x i32] -; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -589,14 +589,11 @@ ; CHECK-LABEL: @alloca_5( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BLAM_4:%.*]], align 8 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 1 -; CHECK-NEXT: [[TMP37:%.*]] = bitcast i64** [[TMP36]] to i8* ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 3 ; CHECK-NEXT: [[TMP39:%.*]] = bitcast i64* [[TMP38]] to i64* ; CHECK-NEXT: store i64 0, i64* [[TMP39]], align 4 ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB46:%.*]], label [[BB47:%.*]] ; CHECK: bb46: -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP37]], i8 0, i64 26, i1 false) ; CHECK-NEXT: ret void ; CHECK: bb47: ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_BLAM_4]], %struct.blam.4* [[TMP]], i64 0, i32 0, i32 2 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -174,10 +174,8 @@ define void @test11() { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[P:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -74,35 +74,6 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) -define void @test14(i32* %Q) { -; CHECK-LABEL: @test14( -; CHECK-NEXT: ret void -; - %P = alloca i32 - %DEAD = load i32, i32* %Q - store i32 %DEAD, i32* %P - ret void - -} - -define void @test20() { -; CHECK-LABEL: @test20( -; CHECK-NEXT: ret void -; - %m = call i8* @malloc(i32 24) - store i8 0, i8* %m - ret void -} - -define void @test21() { -; CHECK-LABEL: @test21( -; CHECK-NEXT: ret void -; - %m = call i8* @calloc(i32 9, i32 7) - store i8 0, i8* %m - ret void -} - define void @test22(i1 %i, i32 %k, i32 %m) nounwind { ; CHECK-LABEL: @test22( ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -178,6 +178,35 @@ declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) +define void @test14(i32* %Q) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: ret void +; + %P = alloca i32 + %DEAD = load i32, i32* %Q + store i32 %DEAD, i32* %P + ret void + +} + +define void @test20() { +; CHECK-LABEL: @test20( +; CHECK-NEXT: ret void +; + %m = call i8* @malloc(i32 24) + store i8 0, i8* %m + ret void +} + +define void @test21() { +; CHECK-LABEL: @test21( +; CHECK-NEXT: ret void +; + %m = call i8* @calloc(i32 9, i32 7) + store i8 0, i8* %m + ret void +} + ; The store here is not dead because the byval call reads it. declare void @test19f({i32}* byval align 4 %P)