diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1482,9 +1482,12 @@ SmallVector MemDefs; // Any that should be skipped as they are already deleted SmallPtrSet SkipStores; - // Keep track of all of the objects that are invisible to the caller until the + // Keep track of all of the objects that are invisible to the caller before + // the function returns. + SmallPtrSet InvisibleToCallerBeforeRet; + // Keep track of all of the objects that are invisible to the caller // function returns. - SmallPtrSet InvisibleToCaller; + SmallPtrSet InvisibleToCallerAfterRet; // Keep track of blocks with throwing instructions not modeled in MemorySSA. SmallPtrSet ThrowingBlocks; // Post-order numbers for each basic block. Used to figure out if memory @@ -1518,13 +1521,24 @@ hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I)) State.MemDefs.push_back(MD); - // Track alloca and alloca-like objects. Here we care about objects not - // visible to the caller during function execution. Alloca objects are - // invalid in the caller, for alloca-like objects we ensure that they - // are not captured throughout the function. - if (isa(&I) || - (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true))) - State.InvisibleToCaller.insert(&I); + // Track whether alloca and alloca-like objects are visible in the + // caller before and after the function returns. Alloca objects are + // invalid in the caller, so they are neither visible before or after + // the function returns. + if (isa(&I)) { + State.InvisibleToCallerBeforeRet.insert(&I); + State.InvisibleToCallerAfterRet.insert(&I); + } + + // For alloca-like objects we need to check if they are captured before + // the function returns and if the return might capture the object. + if (isAllocLikeFn(&I, &TLI)) { + bool CapturesBeforeRet = PointerMayBeCaptured(&I, false, true); + if (!CapturesBeforeRet) + State.InvisibleToCallerBeforeRet.insert(&I); + if (!CapturesBeforeRet && !PointerMayBeCaptured(&I, true, false)) + State.InvisibleToCallerAfterRet.insert(&I); + } } } @@ -1532,7 +1546,7 @@ // dead at the end of the function. for (Argument &AI : F.args()) if (AI.hasByValOrInAllocaAttr()) - State.InvisibleToCaller.insert(&AI); + State.InvisibleToCallerBeforeRet.insert(&AI); return State; } @@ -1601,14 +1615,15 @@ } // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no - // read access in between or return None otherwise. The returned value may not - // (completely) overwrite \p DefLoc. Currently we bail out when we encounter - // an aliasing MemoryUse (read). - Optional getDomMemoryDef(MemoryDef *KillingDef, - MemoryAccess *Current, - MemoryLocation DefLoc, - bool DefVisibleToCaller, - int &ScanLimit) const { + // read access between them or on any other path to a function exit block if + // \p DefLoc is not accessible after the function returns. If there is no such + // MemoryDef, return None. The returned value may not (completely) overwrite + // \p DefLoc. Currently we bail out when we encounter an aliasing MemoryUse + // (read). + Optional + getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current, + MemoryLocation DefLoc, bool DefVisibleToCallerBeforeRet, + bool DefVisibleToCallerAfterRet, int &ScanLimit) const { MemoryAccess *DomAccess; bool StepAgain; LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current @@ -1634,12 +1649,13 @@ if (isa(DomAccess)) break; - // Check if we can skip DomDef for DSE. We also require the KillingDef - // execute whenever DomDef executes and use post-dominance to ensure that. - + // Check if we can skip DomDef for DSE. For accesses to objects that are + // accessible after the function returns, KillingDef must execute whenever + // DomDef executes and use post-dominance to ensure that. MemoryDef *DomDef = dyn_cast(DomAccess); - if ((DomDef && canSkipDef(DomDef, DefVisibleToCaller)) || - !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) { + if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) || + (DefVisibleToCallerAfterRet && + !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) { StepAgain = true; Current = DomDef->getDefiningAccess(); } @@ -1761,7 +1777,7 @@ // First see if we can ignore it by using the fact that SI is an // alloca/alloca like object that is not visible to the caller during // execution of the function. - if (SILocUnd && InvisibleToCaller.count(SILocUnd)) + if (SILocUnd && InvisibleToCallerBeforeRet.count(SILocUnd)) return false; if (SI->getParent() == NI->getParent()) @@ -1779,7 +1795,7 @@ MemoryLocation &NILoc) const { // If NI may throw it acts as a barrier, unless we are to an alloca/alloca // like object that does not escape. - if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd)) + if (NI->mayThrow() && !InvisibleToCallerBeforeRet.count(SILocUnd)) return true; if (NI->isAtomic()) { @@ -1820,10 +1836,15 @@ const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL); Instruction *DefObj = const_cast(dyn_cast(SILocUnd)); - bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd); - if (DefObj && ((isAllocLikeFn(DefObj, &TLI) && - !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT)))) - DefVisibleToCaller = false; + bool DefVisibleToCallerBeforeRet = + !State.InvisibleToCallerBeforeRet.count(SILocUnd); + bool DefVisibleToCallerAfterRet = + !State.InvisibleToCallerAfterRet.count(SILocUnd); + if (DefObj && isAllocLikeFn(DefObj, &TLI)) { + if (DefVisibleToCallerBeforeRet) + DefVisibleToCallerBeforeRet = + PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT); + } MemoryAccess *Current = KillingDef; LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " @@ -1841,7 +1862,8 @@ continue; Optional Next = State.getDomMemoryDef( - KillingDef, Current, SILoc, DefVisibleToCaller, ScanLimit); + KillingDef, Current, SILoc, DefVisibleToCallerBeforeRet, + DefVisibleToCallerAfterRet, ScanLimit); if (!Next) { LLVM_DEBUG(dbgs() << " finished walk\n"); diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll @@ -21,10 +21,9 @@ ; } -define dso_local i32 @_Z1av() !dbg !7 { +define dso_local i32 @_Z1av([5 x i64]* %b) !dbg !7 { entry: %retval = alloca i32, align 4 - %b = alloca [5 x i64], align 16 call void @llvm.dbg.declare(metadata [5 x i64]* %b, metadata !11, metadata !DIExpression()), !dbg !16 %0 = bitcast [5 x i64]* %b to i8*, !dbg !16 call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 40, i1 false), !dbg !16 @@ -37,7 +36,7 @@ store i64 2, i64* %4, align 16, !dbg !16 %5 = getelementptr inbounds [5 x i64], [5 x i64]* %1, i32 0, i32 3, !dbg !16 store i64 2, i64* %5, align 8, !dbg !16 - %call = call i32 @_Z1av(), !dbg !17 + %call = call i32 @_Z1av([5 x i64]* %b), !dbg !17 %tobool = icmp ne i32 %call, 0, !dbg !17 br i1 %tobool, label %if.then, label %if.end, !dbg !19 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-unknown-sizes.ll @@ -13,7 +13,7 @@ ; CHECK: cond.true.i.i.i: ; CHECK-NEXT: ret void ; CHECK: cond.end.i.i.i: -; CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam() #2 +; CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = tail call noalias nonnull i8* @_Znam() ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL_I_I_I_I_I]] to i64* ; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I]], i8 0, i64 undef, i1 false) ; CHECK-NEXT: store i64 0, i64* [[TMP0]], align 8 @@ -42,7 +42,7 @@ ; CHECK-NEXT: br i1 [[C:%.*]], label [[CLEANUP_CONT104:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[MUL_I_I_I_I:%.*]] = shl nuw nsw i64 undef, 3 -; CHECK-NEXT: [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm() #2 +; CHECK-NEXT: [[CALL_I_I_I_I_I_I131:%.*]] = call noalias nonnull i8* @_Znwm() ; CHECK-NEXT: [[DOTCAST_I_I:%.*]] = bitcast i8* [[CALL_I_I_I_I_I_I131]] to i64* ; CHECK-NEXT: store i64 0, i64* [[DOTCAST_I_I]], align 8 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 8 [[CALL_I_I_I_I_I_I131]], i8 0, i64 [[MUL_I_I_I_I]], i1 false) diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-malloc-free.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; XFAIL: * -; TODO: Handling of free not implemented yet. - ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -308,7 +305,7 @@ define noalias %struct.SystemCallMapElementStruct* @SystemCallMapElement_new(i8* nocapture readonly %label, i32 %initialSize) { ; CHECK-LABEL: @SystemCallMapElement_new( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #7 +; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(24) i8* @malloc(i64 24) #6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.SystemCallMapElementStruct* ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null ; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_THEN:%.*]] @@ -380,7 +377,7 @@ define noalias %struct.BitfieldStruct* @Bitfield_new(i32 %bitsNeeded) { ; CHECK-LABEL: @Bitfield_new( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #7 +; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(16) i8* @malloc(i64 16) #6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to %struct.BitfieldStruct* ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[CALL]], null ; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] @@ -388,7 +385,7 @@ ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[BITSNEEDED:%.*]], 7 ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[ADD]], 8 ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -; CHECK-NEXT: [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #8 +; CHECK-NEXT: [[CALL1:%.*]] = tail call i8* @calloc(i64 [[CONV]], i64 1) #7 ; CHECK-NEXT: [[BITFIELD:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BITFIELD]] to i8** ; CHECK-NEXT: store i8* [[CALL1]], i8** [[TMP1]], align 8 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -126,7 +126,8 @@ ; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br label [[BB3:%.*]] @@ -162,7 +163,8 @@ ; CHECK-NEXT: [[P:%.*]] = bitcast [32 x i32]* [[P_ALLOCA]] to i32* ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -136,10 +136,11 @@ ; Tests where the pointer/object is *NOT* accessible after the function returns. +; The store in the entry block can be eliminated, because the it is overwritten +; on all paths to the exit. define void @alloca_1(i1 %c1) { ; CHECK-LABEL: @alloca_1( ; CHECK-NEXT: [[P:%.*]] = alloca i32 -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: store i32 0, i32* [[P]] @@ -167,10 +168,11 @@ ret void } +; The store in the entry block can be eliminated, because the it is overwritten +; on all paths to the exit. define void @alloca_2(i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @alloca_2( ; CHECK-NEXT: [[P:%.*]] = alloca i32 -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: store i32 0, i32* [[P]] @@ -211,6 +213,8 @@ ret void } +; The store in the entry block cannot be eliminated. There's a path from the +; first store to the read in bb5, where the location is not overwritten. define void @alloca_3(i1 %c1) { ; CHECK-LABEL: @alloca_3( ; CHECK-NEXT: [[P:%.*]] = alloca i32 @@ -240,10 +244,12 @@ ret void } +; The store in the entry block can be eliminated, because the it is overwritten +; before the use in bb1 and not read on other paths to the function exit. The +; object cannot be accessed by the caller. define void @alloca_4(i1 %c1) { ; CHECK-LABEL: @alloca_4( ; CHECK-NEXT: [[P:%.*]] = alloca i32 -; CHECK-NEXT: store i32 1, i32* [[P]] ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: store i32 0, i32* [[P]] diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -175,7 +175,6 @@ define void @test11() { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[P:%.*]] = alloca i32 -; CHECK-NEXT: store i32 0, i32* [[P]] ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: store i32 0, i32* [[P]]