diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -34,6 +34,7 @@ class MemorySSA; class MemorySSAUpdater; class MemSetInst; +class PostDominatorTree; class StoreInst; class TargetLibraryInfo; class Value; @@ -43,6 +44,7 @@ AAResults *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; MemorySSA *MSSA = nullptr; MemorySSAUpdater *MSSAU = nullptr; @@ -53,7 +55,8 @@ // Glue for the old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, - AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA); + AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, + MemorySSA *MSSA); private: // Helper functions diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -1415,6 +1417,74 @@ return true; } +using InsertionPt = PointerUnion; +/// Find the nearest Instruction or BasicBlock that dominates both I1 and +/// I2. +static InsertionPt findNearestCommonDominator(InsertionPt I1, InsertionPt I2, + DominatorTree *DT) { + auto GetParent = [](InsertionPt I) { + if (auto *BB = dyn_cast(I)) + return BB; + return cast(I)->getParent(); + }; + BasicBlock *BB1 = GetParent(I1); + BasicBlock *BB2 = GetParent(I2); + if (BB1 == BB2) { + // BasicBlock InsertionPt means the terminator. + if (isa(I1)) + return I2; + if (isa(I2)) + return I1; + return cast(I1)->comesBefore(cast(I2)) ? I1 + : I2; + } + + // These checks are necessary, because findNearestCommonDominator for NodeT + // doesn't handle these. + if (!DT->isReachableFromEntry(BB2)) + return I1; + if (!DT->isReachableFromEntry(BB1)) + return I2; + + BasicBlock *DomBB = DT->findNearestCommonDominator(BB1, BB2); + if (BB2 == DomBB) + return I2; + if (BB1 == DomBB) + return I1; + return DomBB; +} + +/// Find the nearest Instruction or BasicBlock that post-dominates both I1 and +/// I2. +static InsertionPt findNearestCommonPostDominator(InsertionPt I1, + InsertionPt I2, + PostDominatorTree *PDT) { + auto GetParent = [](InsertionPt I) { + if (auto *BB = dyn_cast(I)) + return BB; + return cast(I)->getParent(); + }; + BasicBlock *BB1 = GetParent(I1); + BasicBlock *BB2 = GetParent(I2); + if (BB1 == BB2) { + // BasicBlock InsertionPt means the first non-phi instruction. + if (isa(I1)) + return I2; + if (isa(I2)) + return I1; + return cast(I1)->comesBefore(cast(I2)) ? I2 + : I1; + } + BasicBlock *PDomBB = PDT->findNearestCommonDominator(BB1, BB2); + if (!PDomBB) + return nullptr; + if (BB2 == PDomBB) + return I2; + if (BB1 == PDomBB) + return I1; + return PDomBB; +} + // Attempts to optimize the pattern whereby memory is copied from an alloca to // another alloca, where the two allocas don't have conflicting mod/ref. If // successful, the two allocas can be merged into one and the transfer can be @@ -1440,8 +1510,7 @@ return false; } - // 1. Check that copy is full. Calculate the static size of the allocas to be - // merged, bail out if we can't. + // Check that copy is full with static size. const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); std::optional SrcSize = SrcAlloca->getAllocationSize(DL); if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { @@ -1455,19 +1524,16 @@ return false; } - // 2-1. Check that src and dest are static allocas, which are not affected by - // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() || - SrcAlloca->getParent() != Load->getParent() || - SrcAlloca->getParent() != Store->getParent()) + if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) return false; - // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. + // Check that src and dest are never captured, unescaped allocas. Also + // find the nearest common dominator and postdominator for all users in + // order to shrink wrap the lifetimes, and instructions with noalias metadata + // to remove them. SmallVector LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; + InsertionPt Dom = nullptr, PDom = nullptr; SmallSet NoAliasInstrs; // Recursively track the user and check whether modified alias exist. @@ -1505,12 +1571,13 @@ continue; case UseCaptureKind::NO_CAPTURE: { auto *UI = cast(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; + if (!Dom) { + PDom = Dom = UI; + } else { + Dom = findNearestCommonDominator(Dom, UI, DT); + if (PDom) + PDom = findNearestCommonPostDominator(PDom, UI, PDT); + } if (UI->isLifetimeStartOrEnd()) { // We note the locations of these intrinsic calls so that we can // delete them later if the optimization succeeds, this is safe @@ -1534,37 +1601,64 @@ return true; }; - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics, - // from the alloca to the Store. + // Check that dest has no Mod/Ref, from the alloca to the Store, except full + // size lifetime intrinsics. And collect modref inst for the reachability + // check. ModRefInfo DestModRef = ModRefInfo::NoModRef; MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); + SmallVector ReachabilityWorklist; auto DestModRefCallback = [&](Instruction *UI) -> bool { // We don't care about the store itself. if (UI == Store) return true; ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); - // FIXME: For multi-BB cases, we need to see reachability from it to - // store. - // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) - return false; - DestModRef |= BAA.getModRefInfo(UI, DestLoc); + DestModRef |= Res; + if (isModOrRefSet(Res)) { + // Instructions reachability checks. + // FIXME: adding the Instruction version isPotentiallyReachableFromMany on + // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful. + if (UI->getParent() == Store->getParent()) { + // The same block case is special because it's the only time we're + // looking within a single block to see which instruction comes first. + // Once we start looking at multiple blocks, the first instruction of + // the block is reachable, so we only need to determine reachability + // between whole blocks. + BasicBlock *BB = UI->getParent(); + + // If A comes before B, then B is definitively reachable from A. + if (UI->comesBefore(Store)) + return false; + + // If the user's parent block is entry, no predecessor exists. + if (BB->isEntryBlock()) + return true; + // Otherwise, continue doing the normal per-BB CFG walk. + ReachabilityWorklist.append(succ_begin(BB), succ_end(BB)); + } else { + ReachabilityWorklist.push_back(UI->getParent()); + } + } return true; }; if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) return false; + // Bailout if Dest may have any ModRef before Store. + if (!ReachabilityWorklist.empty() && + isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(), + nullptr, DT, nullptr)) + return false; - // 3. Check that, from after the Load to the end of the BB, - // 3-1. if the dest has any Mod, src has no Ref, and - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. + // Check that, from after the Load to the end of the BB, + // - if the dest has any Mod, src has no Ref, and + // - if the dest has any Ref, src has no Mod except full-sized lifetimes. MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) + // Any ModRef post-dominated by Load doesn't matter, also Load and Store + // themselves can be ignored. + if (PDT->dominates(Load, UI) || UI == Load || UI == Store) return true; ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); if ((isModSet(DestModRef) && isRefSet(Res)) || @@ -1596,22 +1690,48 @@ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); // Create a new lifetime start marker before the first user of src or alloca // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); - auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); - auto *FirstMA = MSSA->getMemoryAccess(FirstUser); - auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA); + MemoryAccess *StartMA; + if (auto *DomI = dyn_cast_if_present(Dom)) { + Builder.SetInsertPoint(DomI->getParent(), DomI->getIterator()); + auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); + StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, + MSSA->getMemoryAccess(DomI)); + } else { + auto *DomB = cast(Dom); + Builder.SetInsertPoint(DomB->getTerminator()); + auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); + StartMA = MSSAU->createMemoryAccessInBB( + Start, nullptr, Start->getParent(), MemorySSA::BeforeTerminator); + } MSSAU->insertDef(cast(StartMA), /*RenameUses=*/true); // Create a new lifetime end marker after the last user of src or alloca - // users. - // FIXME: If the last user is the terminator for the bb, we can insert - // lifetime.end marker to the immidiate post-dominator, but currently do - // nothing. - if (!LastUser->isTerminator()) { - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); - auto *LastMA = MSSA->getMemoryAccess(LastUser); - auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA); + // users. If there's no such postdominator, just don't bother; we could + // create one at each exit block, but that'd be essentially semantically + // meaningless. + // If the PDom is the terminator (e.g. invoke), see the next immediate post + // dominator. + if (auto *PDomI = dyn_cast_if_present(PDom); + PDomI && PDomI->isTerminator()) { + auto *IPDomNode = (*PDT)[PDomI->getParent()]->getIDom(); + PDom = IPDomNode ? IPDomNode->getBlock() : nullptr; + } + if (PDom) { + MemoryAccess *EndMA; + if (auto *PDomI = dyn_cast(PDom)) { + // If PDom is Instruction ptr, insert after it, because it's a user of + // SrcAlloca. + Builder.SetInsertPoint(PDomI->getParent(), ++PDomI->getIterator()); + auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); + EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, + MSSA->getMemoryAccess(PDomI)); + } else { + auto *PDomB = cast(PDom); + Builder.SetInsertPoint(PDomB, PDomB->getFirstInsertionPt()); + auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); + EndMA = MSSAU->createMemoryAccessInBB(End, nullptr, End->getParent(), + MemorySSA::Beginning); + } MSSAU->insertDef(cast(EndMA), /*RenameUses=*/true); } @@ -1999,9 +2119,10 @@ auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *PDT = &AM.getResult(F); auto *MSSA = &AM.getResult(F); - bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA()); + bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA()); if (!MadeChange) return PreservedAnalyses::all(); @@ -2013,12 +2134,14 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, AssumptionCache *AC_, - DominatorTree *DT_, MemorySSA *MSSA_) { + DominatorTree *DT_, PostDominatorTree *PDT_, + MemorySSA *MSSA_) { bool MadeChange = false; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; + PDT = PDT_; MSSA = MSSA_; MemorySSAUpdater MSSAU_(MSSA_); MSSAU = &MSSAU_; diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -190,6 +190,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -201,7 +202,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -103,8 +103,8 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo -; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo ; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo +; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -125,6 +125,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -135,7 +136,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -157,6 +157,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -167,7 +168,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -406,24 +406,19 @@ ret void } -; TODO: merge allocas for bb-separated, but logically straight define void @multi_bb_memcpy(i1 %b) { ; CHECK-LABEL: define void @multi_bb_memcpy ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 4, i1 false) ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -445,23 +440,17 @@ ret void } -; TODO: Merge alloca define void @multi_bb_load_store(i1 %b) { ; CHECK-LABEL: define void @multi_bb_load_store ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -525,28 +514,22 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. all copy-dominated -; uses are satisfy the condition. define void @multi_bb_simple_br(i1 %b) { ; CHECK-LABEL: define void @multi_bb_simple_br ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -577,7 +560,7 @@ ; CHECK-LABEL: define void @multi_bb_dom_test0 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 @@ -586,9 +569,8 @@ ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 40, i32 50, i32 60 }, ptr [[SRC]], align 4 ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -663,20 +645,18 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test0 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -707,19 +687,18 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test1 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 42, [[BB0]] ], [ 41, [[BB1]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -748,17 +727,15 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test2 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; CHECK: unr1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[UNR2:%.*]] ; CHECK: unr2: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[UNR1:%.*]] ; %src = alloca %struct.Foo, align 4 @@ -786,19 +763,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4 ; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]] ; CHECK: loop_body: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1 ; CHECK-NEXT: store i32 [[NEW_I]], ptr [[SRC]], align 4 ; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]] ; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]] ; CHECK: loop_exit: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; entry: @@ -822,14 +797,11 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists. define void @multi_bb_unreachable_modref(i1 %b0) { ; CHECK-LABEL: define void @multi_bb_unreachable_modref ; CHECK-SAME: (i1 [[B0:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]] @@ -837,9 +809,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -861,26 +830,21 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses. define void @multi_bb_non_dominated(i1 %b0, i1 %b1) { ; CHECK-LABEL: define void @multi_bb_non_dominated ; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4