diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -83,6 +83,9 @@ STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); STATISTIC(NumModifiedStores, "Number of stores modified"); STATISTIC(NumNoopStores, "Number of noop stores deleted"); +STATISTIC(NumCFGChecks, "Number of stores modified"); +STATISTIC(NumCFGTries, "Number of stores modified"); +STATISTIC(NumCFGSuccess, "Number of stores modified"); DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated."); @@ -111,6 +114,11 @@ cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)")); +static cl::opt MemorySSAPathCheckLimit( + "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, + cl::desc("The maximum number of blocks to check when trying to prove that " + "all paths to an exit go through a killing block (default = 50)")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// @@ -1591,15 +1599,15 @@ if (CB->onlyAccessesInaccessibleMemory()) return false; - ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc); - // If necessary, perform additional analysis. - if (isModSet(MR) && isa(UseInst)) - MR = AA.callCapturesBefore(UseInst, DefLoc, &DT); + int64_t InstWriteOffset, DepWriteOffset; + auto CC = getLocForWriteEx(UseInst); + InstOverlapIntervalsTy IOL; + + const DataLayout &DL = F.getParent()->getDataLayout(); - Optional UseLoc = getLocForWriteEx(UseInst); - return isModSet(MR) && isMustSet(MR) && - (UseLoc->Size.hasValue() && DefLoc.Size.hasValue() && - UseLoc->Size.getValue() >= DefLoc.Size.getValue()); + return CC && + isOverwrite(DefLoc, *CC, DL, TLI, DepWriteOffset, InstWriteOffset, + UseInst, IOL, AA, &F) == OW_Complete; } /// Returns true if \p Use may read from \p DefLoc. @@ -1653,19 +1661,20 @@ if (isa(DomAccess)) break; - // Check if we can skip DomDef for DSE. For accesses to objects that are - // accessible after the function returns, KillingDef must execute whenever - // DomDef executes and use post-dominance to ensure that. + // Check if we can skip DomDef for DSE. MemoryDef *DomDef = dyn_cast(DomAccess); - if ((DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) || - (DefVisibleToCallerAfterRet && - !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock()))) { + if (DomDef && canSkipDef(DomDef, DefVisibleToCallerBeforeRet)) { StepAgain = true; Current = DomDef->getDefiningAccess(); } } while (StepAgain); + // Accesses to objects accessible after the function returns can only be + // eliminated if the access is killed along all paths to the exit. Collect + // the blocks with killing (=completely overwriting MemoryDefs) and check if + // they cover all paths from DomAccess to any function exit. + SmallPtrSet KillingBlocks = {KillingDef->getBlock()}; LLVM_DEBUG({ dbgs() << " Checking for reads of " << *DomAccess; if (isa(DomAccess)) @@ -1733,11 +1742,92 @@ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias, // stores [0,1] if (MemoryDef *UseDef = dyn_cast(UseAccess)) { - if (!isCompleteOverwrite(DefLoc, UseInst)) + if (isCompleteOverwrite(DefLoc, UseInst)) { + if (DefVisibleToCallerAfterRet && UseAccess != DomAccess) { + BasicBlock *MaybeKillingBlock = UseInst->getParent(); + if (PostOrderNumbers.find(MaybeKillingBlock)->second < + PostOrderNumbers.find(DomAccess->getBlock())->second) { + + LLVM_DEBUG(dbgs() << " ... found killing block " + << MaybeKillingBlock->getName() << "\n"); + KillingBlocks.insert(MaybeKillingBlock); + } + } + } else PushMemUses(UseDef); } } + // For accesses to locations visible after the function returns, make sure + // that the location is killed (=overwritten) along all paths from DomAccess + // to the exit. + if (DefVisibleToCallerAfterRet) { + assert(!KillingBlocks.empty() && + "Expected at least a single killing block"); + // Find the common post-dominator of all killing blocks. + BasicBlock *CommonPred = *KillingBlocks.begin(); + for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end(); + I != E; I++) { + if (!CommonPred) + break; + CommonPred = PDT.findNearestCommonDominator(CommonPred, *I); + } + + // If CommonPred is in the set of killing blocks, just check if it + // post-dominates DomAccess. + if (KillingBlocks.count(CommonPred)) { + if (PDT.dominates(CommonPred, DomAccess->getBlock())) + return {DomAccess}; + return None; + } + + // If the common post-dominator does not post-dominate DomAccess, there + // is a path from DomAccess to an exit not going through a killing block. + if (PDT.dominates(CommonPred, DomAccess->getBlock())) { + SetVector WorkList; + + // DomAccess's post-order number provides an upper bound of the blocks + // on a path starting at DomAccess. + unsigned UpperBound = + PostOrderNumbers.find(DomAccess->getBlock())->second; + + // If CommonPred is null, there are multiple exits from the function. + // They all have to be added to the worklist. + if (CommonPred) + WorkList.insert(CommonPred); + else + for (BasicBlock *R : PDT.getRoots()) { + if (!DT.isReachableFromEntry(R)) + continue; + WorkList.insert(R); + } + + NumCFGTries++; + // Check if all paths starting from an exit node go through one of the + // killing blocks before reaching DomAccess. + for (unsigned I = 0; I < WorkList.size(); I++) { + NumCFGChecks++; + BasicBlock *Current = WorkList[I]; + if (KillingBlocks.count(Current)) + continue; + if (Current == DomAccess->getBlock()) + return None; + unsigned CPO = PostOrderNumbers.find(Current)->second; + // Current block is not on a path starting at DomAccess. + if (CPO > UpperBound) + continue; + for (BasicBlock *Pred : predecessors(Current)) + WorkList.insert(Pred); + + if (WorkList.size() >= MemorySSAPathCheckLimit) + return None; + } + NumCFGSuccess++; + return {DomAccess}; + } + return None; + } + // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead. return {DomAccess}; } diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memintrinsics.ll @@ -46,7 +46,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 ; CHECK-NEXT: [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 28, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false) ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1 diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll @@ -9,10 +9,9 @@ define void @accessible_after_return_1(i32* noalias %P, i1 %c1) { ; CHECK-LABEL: @accessible_after_return_1( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 3, i32* [[P]], align 4 @@ -38,10 +37,9 @@ define void @accessible_after_return_2(i32* noalias %P, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @accessible_after_return_2( -; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, i32* [[P]], align 4 +; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] @@ -77,6 +75,8 @@ ret void } +; Cannot remove store in entry block because it is not overwritten on path +; entry->bb2->bb5. define void @accessible_after_return_3(i32* noalias %P, i1 %c1) { ; CHECK-LABEL: @accessible_after_return_3( ; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 @@ -105,6 +105,8 @@ ret void } +; Cannot remove store in entry block because it is not overwritten on path +; entry->bb2->bb5. define void @accessible_after_return_4(i32* noalias %P, i1 %c1) { ; CHECK-LABEL: @accessible_after_return_4( ; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 @@ -179,12 +181,11 @@ define void @accessible_after_return6(i32* %P, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @accessible_after_return6( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] ; CHECK: bb2: -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; CHECK: bb3: ; CHECK-NEXT: store i32 2, i32* [[P]], align 4 @@ -220,10 +221,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[C_2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] ; CHECK: bb3: -; CHECK-NEXT: store i32 2, i32* [[P]], align 4 +; CHECK-NEXT: store i32 2, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb4: ; CHECK-NEXT: store i32 1, i32* [[P]], align 4 @@ -257,7 +257,7 @@ ; Cannot remove store in entry block, because it is overwritten along each path to -; the exit (entry->bb1->bb5->bb5). +; the exit (entry->bb1->bb4->bb5). define void @accessible_after_return8(i32* %P, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @accessible_after_return8( ; CHECK-NEXT: entry: @@ -353,6 +353,9 @@ ret void } +; Cannot remove store in entry block because it is not overwritten on path +; entry->bb2->bb4. Also make sure we deal with dead exit blocks without +; crashing. define void @accessible_after_return10_dead_block(i32* %P, i1 %c.1, i1 %c.2) { ; CHECK-LABEL: @accessible_after_return10_dead_block( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll @@ -199,10 +199,9 @@ define void @test12(i32* %P) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[P]], align 4 @@ -225,10 +224,9 @@ define void @test13(i32* %P) { ; CHECK-LABEL: @test13( -; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 1, i32* [[P]], align 4 +; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb2: ; CHECK-NEXT: store i32 1, i32* [[P]], align 4