Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -34,6 +34,7 @@ class MemorySSA; class MemorySSAUpdater; class MemSetInst; +class PostDominatorTree; class StoreInst; class TargetLibraryInfo; class Value; @@ -43,6 +44,7 @@ AAResults *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; MemorySSA *MSSA = nullptr; MemorySSAUpdater *MSSAU = nullptr; @@ -53,7 +55,8 @@ // Glue for the old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, - AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA); + AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, + MemorySSA *MSSA); private: // Helper functions Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -1468,17 +1470,16 @@ // 2-1. Check that src and dest are static allocas, which are not affected by // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() || - SrcAlloca->getParent() != Load->getParent() || - SrcAlloca->getParent() != Store->getParent()) + if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) return false; // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. + // find the nearest common dominator and postdominator for all users in + // order to shrink wrap the lifetimes, and instructions with noalias metadata + // to remove them. SmallVector LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; + Instruction *Dom = nullptr, *PDom = nullptr; SmallSet NoAliasInstrs; // Recursively track the user and check whether modified alias exist. @@ -1516,12 +1517,13 @@ continue; case UseCaptureKind::NO_CAPTURE: { auto *UI = cast(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; + if (!Dom) + Dom = PDom = UI; + else { + Dom = DT->findNearestCommonDominator(Dom, UI); + if (PDom != nullptr) + PDom = PDT->findNearestCommonDominator(PDom, UI); + } if (UI->hasMetadata(LLVMContext::MD_noalias)) NoAliasInstrs.insert(UI); if (UI->isLifetimeStartOrEnd()) { @@ -1557,7 +1559,7 @@ // FIXME: For multi-BB cases, we need to see reachability from it to // store. // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) + if (isPotentiallyReachable(UI, Store) && isModOrRefSet(Res)) return false; DestModRef |= BAA.getModRefInfo(UI, DestLoc); @@ -1573,9 +1575,9 @@ MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) + // Any ModRef post-dominated by Load doesn't matter, also Load and Store + // themselves can be ignored. + if (PDT->dominates(Load, UI) || UI == Load || UI == Store) return true; ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); if ((isModSet(DestModRef) && isRefSet(Res)) || @@ -1607,13 +1609,17 @@ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); // Create a new lifetime start marker before the first user of src or alloca // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); + Builder.SetInsertPoint(Dom->getParent(), Dom->getIterator()); Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); // Create a new lifetime end marker after the last user of src or alloca - // users. - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); + // users. If there's no such postdominator, just don't bother; we could + // create one at each exit block, but that'd be essentially semantically + // meaningless. + if (PDom) { + Builder.SetInsertPoint(PDom->getParent(), ++PDom->getIterator()); + Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); + } // Remove all other lifetime markers. for (Instruction *I : LifetimeMarkers) @@ -1999,9 +2005,10 @@ auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *PDT = &AM.getResult(F); auto *MSSA = &AM.getResult(F); - bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA()); + bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA()); if (!MadeChange) return PreservedAnalyses::all(); @@ -2013,12 +2020,14 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, AssumptionCache *AC_, - DominatorTree *DT_, MemorySSA *MSSA_) { + DominatorTree *DT_, PostDominatorTree *PDT_, + MemorySSA *MSSA_) { bool MadeChange = false; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; + PDT = PDT_; MSSA = MSSA_; MemorySSAUpdater MSSAU_(MSSA_); MSSAU = &MSSAU_; Index: llvm/test/Transforms/MemCpyOpt/stack-move.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -328,19 +328,15 @@ ; CHECK-LABEL: define void @multi_bb_memcpy ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 4, i1 false) ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -411,22 +407,18 @@ ; CHECK-LABEL: define void @multi_bb_simple_br ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -500,9 +492,7 @@ ; CHECK-LABEL: define void @multi_bb_unreachable_modref ; CHECK-SAME: (i1 [[B0:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]] @@ -510,9 +500,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -539,21 +526,17 @@ ; CHECK-LABEL: define void @multi_bb_non_dominated ; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4