diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -34,6 +34,7 @@ class MemorySSA; class MemorySSAUpdater; class MemSetInst; +class PostDominatorTree; class StoreInst; class TargetLibraryInfo; class Value; @@ -43,6 +44,7 @@ AAResults *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + PostDominatorTree *PDT = nullptr; MemorySSA *MSSA = nullptr; MemorySSAUpdater *MSSAU = nullptr; @@ -53,7 +55,8 @@ // Glue for the old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, - AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA); + AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, + MemorySSA *MSSA); private: // Helper functions diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -19,12 +19,14 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -1440,8 +1442,7 @@ return false; } - // 1. Check that copy is full. Calculate the static size of the allocas to be - // merged, bail out if we can't. + // Check that copy is full with static size. const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); std::optional SrcSize = SrcAlloca->getAllocationSize(DL); if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { @@ -1455,19 +1456,15 @@ return false; } - // 2-1. Check that src and dest are static allocas, which are not affected by - // stacksave/stackrestore. - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() || - SrcAlloca->getParent() != Load->getParent() || - SrcAlloca->getParent() != Store->getParent()) + if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) return false; - // 2-2. Check that src and dest are never captured, unescaped allocas. Also - // collect lifetime markers first/last users in order to shrink wrap the - // lifetimes, and instructions with noalias metadata to remove them. + // Check that src and dest are never captured, unescaped allocas. Also + // find the nearest common dominator and postdominator for all users in + // order to shrink wrap the lifetimes, and instructions with noalias metadata + // to remove them. SmallVector LifetimeMarkers; - Instruction *FirstUser = nullptr, *LastUser = nullptr; SmallSet NoAliasInstrs; // Recursively track the user and check whether modified alias exist. @@ -1505,12 +1502,6 @@ continue; case UseCaptureKind::NO_CAPTURE: { auto *UI = cast(U.getUser()); - if (DestAlloca->getParent() != UI->getParent()) - return false; - if (!FirstUser || UI->comesBefore(FirstUser)) - FirstUser = UI; - if (!LastUser || LastUser->comesBefore(UI)) - LastUser = UI; if (UI->isLifetimeStartOrEnd()) { // We note the locations of these intrinsic calls so that we can // delete them later if the optimization succeeds, this is safe @@ -1534,37 +1525,64 @@ return true; }; - // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics, - // from the alloca to the Store. + // Check that dest has no Mod/Ref, from the alloca to the Store, except full + // size lifetime intrinsics. And collect modref inst for the reachability + // check. ModRefInfo DestModRef = ModRefInfo::NoModRef; MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); + SmallVector ReachabilityWorklist; auto DestModRefCallback = [&](Instruction *UI) -> bool { // We don't care about the store itself. if (UI == Store) return true; ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); - // FIXME: For multi-BB cases, we need to see reachability from it to - // store. - // Bailout if Dest may have any ModRef before Store. - if (UI->comesBefore(Store) && isModOrRefSet(Res)) - return false; - DestModRef |= BAA.getModRefInfo(UI, DestLoc); + DestModRef |= Res; + if (isModOrRefSet(Res)) { + // Instructions reachability checks. + // FIXME: adding the Instruction version isPotentiallyReachableFromMany on + // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful. + if (UI->getParent() == Store->getParent()) { + // The same block case is special because it's the only time we're + // looking within a single block to see which instruction comes first. + // Once we start looking at multiple blocks, the first instruction of + // the block is reachable, so we only need to determine reachability + // between whole blocks. + BasicBlock *BB = UI->getParent(); + + // If A comes before B, then B is definitively reachable from A. + if (UI->comesBefore(Store)) + return false; + + // If the user's parent block is entry, no predecessor exists. + if (BB->isEntryBlock()) + return true; + // Otherwise, continue doing the normal per-BB CFG walk. + ReachabilityWorklist.append(succ_begin(BB), succ_end(BB)); + } else { + ReachabilityWorklist.push_back(UI->getParent()); + } + } return true; }; if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) return false; + // Bailout if Dest may have any ModRef before Store. + if (!ReachabilityWorklist.empty() && + isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(), + nullptr, DT, nullptr)) + return false; - // 3. Check that, from after the Load to the end of the BB, - // 3-1. if the dest has any Mod, src has no Ref, and - // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. + // Check that, from after the Load to the end of the BB, + // - if the dest has any Mod, src has no Ref, and + // - if the dest has any Ref, src has no Mod except full-sized lifetimes. MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); auto SrcModRefCallback = [&](Instruction *UI) -> bool { - // Any ModRef before Load doesn't matter, also Load and Store can be - // ignored. - if (UI->comesBefore(Load) || UI == Load || UI == Store) + // Any ModRef post-dominated by Load doesn't matter, also Load and Store + // themselves can be ignored. + if (PDT->dominates(Load, UI) || UI == Load || UI == Store) return true; ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); if ((isModSet(DestModRef) && isRefSet(Res)) || @@ -1588,34 +1606,10 @@ // Drop metadata on the source alloca. SrcAlloca->dropUnknownNonDebugMetadata(); - // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists. + // TODO: Reconstruct merged lifetime markers. + // Remove all other lifetime markers. if the original lifetime intrinsics + // exists. if (!LifetimeMarkers.empty()) { - LLVMContext &C = SrcAlloca->getContext(); - IRBuilder<> Builder(C); - - ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size); - // Create a new lifetime start marker before the first user of src or alloca - // users. - Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); - auto *Start = Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); - auto *FirstMA = MSSA->getMemoryAccess(FirstUser); - auto *StartMA = MSSAU->createMemoryAccessBefore(Start, nullptr, FirstMA); - MSSAU->insertDef(cast(StartMA), /*RenameUses=*/true); - - // Create a new lifetime end marker after the last user of src or alloca - // users. - // FIXME: If the last user is the terminator for the bb, we can insert - // lifetime.end marker to the immidiate post-dominator, but currently do - // nothing. - if (!LastUser->isTerminator()) { - Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); - auto *End = Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); - auto *LastMA = MSSA->getMemoryAccess(LastUser); - auto *EndMA = MSSAU->createMemoryAccessAfter(End, nullptr, LastMA); - MSSAU->insertDef(cast(EndMA), /*RenameUses=*/true); - } - - // Remove all other lifetime markers. for (Instruction *I : LifetimeMarkers) eraseInstruction(I); } @@ -1999,9 +1993,10 @@ auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); + auto *PDT = &AM.getResult(F); auto *MSSA = &AM.getResult(F); - bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA()); + bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA()); if (!MadeChange) return PreservedAnalyses::all(); @@ -2013,12 +2008,14 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, AssumptionCache *AC_, - DominatorTree *DT_, MemorySSA *MSSA_) { + DominatorTree *DT_, PostDominatorTree *PDT_, + MemorySSA *MSSA_) { bool MadeChange = false; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; + PDT = PDT_; MSSA = MSSA_; MemorySSAUpdater MSSAU_(MSSA_); MSSAU = &MSSAU_; diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -190,6 +190,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -201,7 +202,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -103,8 +103,8 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo -; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo ; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo +; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -125,6 +125,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -135,7 +136,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -157,6 +157,7 @@ ; CHECK-O23SZ-NEXT: Running pass: GVNPass ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O1-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis @@ -167,7 +168,7 @@ ; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass -; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: MoveAutoInitPass diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll --- a/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime-missing.ll @@ -15,11 +15,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AGG_TMP_SROA_14:%.*]] = alloca [20 x i8], align 4 ; CHECK-NEXT: [[AGG_TMP_SROA_14_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[AGG_TMP_SROA_14]]) ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[AGG_TMP_SROA_14_128_SROA_IDX]], i8 0, i64 1, i1 false) ; CHECK-NEXT: [[AGG_TMP3_SROA_35_128_SROA_IDX:%.*]] = getelementptr i8, ptr [[AGG_TMP_SROA_14]], i64 4 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr inttoptr (i64 4 to ptr), i8 0, i64 1, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr [[AGG_TMP_SROA_14]]) ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr null, i8 0, i64 1, i1 false) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -24,11 +24,9 @@ define void @basic_memcpy() { ; CHECK-LABEL: define void @basic_memcpy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -50,11 +48,9 @@ define void @basic_memmove() { ; CHECK-LABEL: define void @basic_memmove() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -77,11 +73,9 @@ define void @load_store() { ; CHECK-LABEL: define void @load_store() { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -104,11 +98,9 @@ define void @align_up() { ; CHECK-LABEL: define void @align_up() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -131,12 +123,10 @@ define void @remove_extra_lifetime_intrinsics() { ; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -185,13 +175,11 @@ define void @alias_no_mod() { ; CHECK-LABEL: define void @alias_no_mod() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -224,11 +212,9 @@ define void @remove_scoped_noalias() { ; CHECK-LABEL: define void @remove_scoped_noalias() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -250,11 +236,9 @@ define void @remove_alloca_metadata() { ; CHECK-LABEL: define void @remove_alloca_metadata() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4, !annotation !3 @@ -277,11 +261,9 @@ define void @noalias_on_lifetime() { ; CHECK-LABEL: define void @noalias_on_lifetime() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -303,11 +285,9 @@ define void @src_ref_dest_ref_after_copy() { ; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -329,11 +309,9 @@ define void @src_mod_dest_mod_after_copy() { ; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -354,10 +332,8 @@ define void @avoid_memory_use_last_user_crash() { ; CHECK-LABEL: define void @avoid_memory_use_last_user_crash() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -375,7 +351,6 @@ define void @terminator_lastuse() personality i32 0 { ; CHECK-LABEL: define void @terminator_lastuse() personality i32 0 { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]]) @@ -406,24 +381,17 @@ ret void } -; TODO: merge allocas for bb-separated, but logically straight define void @multi_bb_memcpy(i1 %b) { ; CHECK-LABEL: define void @multi_bb_memcpy ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 4, i1 false) ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -445,23 +413,15 @@ ret void } -; TODO: Merge alloca define void @multi_bb_load_store(i1 %b) { ; CHECK-LABEL: define void @multi_bb_load_store ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) -; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -525,28 +485,20 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. all copy-dominated -; uses are satisfy the condition. define void @multi_bb_simple_br(i1 %b) { ; CHECK-LABEL: define void @multi_bb_simple_br ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -577,7 +529,6 @@ ; CHECK-LABEL: define void @multi_bb_dom_test0 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 @@ -586,9 +537,7 @@ ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 40, i32 50, i32 60 }, ptr [[SRC]], align 4 ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -663,20 +612,16 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test0 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -707,16 +652,13 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test1 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 42, [[BB0]] ], [ 41, [[BB1]] ] @@ -748,17 +690,14 @@ ; CHECK-LABEL: define void @multi_bb_pdom_test2 ; CHECK-SAME: (i1 [[B:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; CHECK: unr1: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[UNR2:%.*]] ; CHECK: unr2: -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[UNR1:%.*]] ; %src = alloca %struct.Foo, align 4 @@ -786,14 +725,10 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4 ; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]] ; CHECK: loop_body: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1 ; CHECK-NEXT: store i32 [[NEW_I]], ptr [[SRC]], align 4 ; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]] @@ -822,14 +757,10 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists. define void @multi_bb_unreachable_modref(i1 %b0) { ; CHECK-LABEL: define void @multi_bb_unreachable_modref ; CHECK-SAME: (i1 [[B0:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]] @@ -837,9 +768,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: ret void ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -861,26 +789,19 @@ ret void } -; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses. define void @multi_bb_non_dominated(i1 %b0, i1 %b1) { ; CHECK-LABEL: define void @multi_bb_non_dominated ; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4