Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -20,6 +20,7 @@ namespace llvm { class AAResults; +class AllocaInst; class BatchAAResults; class AssumptionCache; class CallBase; @@ -77,6 +78,9 @@ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); + bool performStackMoveOptzn(Instruction *Load, Instruction *Store, + AllocaInst *DestAlloca, AllocaInst *SrcAlloca, + uint64_t Size, BatchAAResults &BAA); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,6 +69,7 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); +STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { @@ -730,6 +731,23 @@ return true; } + // If this is a load-store pair from a stack slot to a stack slot, we + // might be able to perform the stack-move optimization just as we do for + // memcpys from an alloca to an alloca. + if (auto *DestAlloca = dyn_cast(SI->getPointerOperand())) { + if (auto *SrcAlloca = dyn_cast(LI->getPointerOperand())) { + if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, + DL.getTypeStoreSize(T), BAA)) { + // Avoid invalidating the iterator. + BBI = SI->getNextNonDebugInstruction()->getIterator(); + eraseInstruction(SI); + eraseInstruction(LI); + ++NumMemCpyInstr; + return true; + } + } + } + return false; } @@ -1407,6 +1425,214 @@ return true; } +// Attempts to optimize the pattern whereby memory is copied from an alloca to +// another alloca, where the two allocas doesn't have conflicting mod/ref. If +// successful, the two allocas can be merged into one and the transfer can be +// deleted. This pattern is generated frequently in Rust, due to the ubiquity of +// move operations in that language. +// +// Once we determine that the optimization is safe to perform, we replace all +// uses of the destination alloca with the source alloca. We also "shrink wrap" +// the lifetime markers of the single merged alloca to the nearest dominating +// and postdominating basic block. Note that the "shrink wrapping" procedure is +// a safe transformation only because we restrict the scope of this optimization +// to allocas that aren't captured. +bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, + AllocaInst *DestAlloca, + AllocaInst *SrcAlloca, uint64_t Size, + BatchAAResults &BAA) { + LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" + << *Store << "\n"); + + // TODO: implement multi BasicBlock transformation, we need to use + // PostDominator and we would be able to relax the condition for each blocks. + if (SrcAlloca->getParent() != DestAlloca->getParent()) { + LLVM_DEBUG(dbgs() << "Stack Move: src and dest allocas are not in the " + "single basic block\n"); + return false; + } + + // Make sure the two allocas are in the same address space. + if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { + LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); + return false; + } + + // 1. Check that copy is full. Calculate the static size of the allocas to be + // merged, bail out if we can't. + const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); + std::optional SrcSize = SrcAlloca->getAllocationSize(DL); + if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { + LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); + return false; + } + std::optional DestSize = DestAlloca->getAllocationSize(DL); + if (!DestSize || DestSize->isScalable() || + Size != DestSize->getFixedValue()) { + LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); + return false; + } + + // 2-1. Check that src and dest are static allocas, which is irrelevant to + // stacksave/stackrestore. + if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) + return false; + + // 2-2. Check that src and dest are never captured, unescaped allocas. Also + // collect lifetime markers first/last users for shrink wrap the lifetimes, + // and instructions with noalias metadata to remove them. + + SmallVector LifetimeMarkers; + Instruction *FirstUser = nullptr, *LastUser = nullptr; + SmallSet NoAliasInstrs; + + // Recursively track the user and check whether modified alias exist. + auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { + // An inbounds GEP can either be a valid pointer (pointing into + // or to the end of an allocation), or be null in the default + // address space. So for an inbounds GEP there is no way to let + // the pointer escape using clever GEP hacking because doing so + // would make the pointer point outside of the allocated object + // and thus make the GEP result a poison value. Similarly, other + // dereferenceable pointers cannot be manipulated without producing + // poison. + if (auto *GEP = dyn_cast(V)) + if (GEP->isInBounds()) + return true; + bool CanBeNull, CanBeFreed; + return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); + }; + SmallVector Worklist; + Worklist.push_back(DestAlloca); + auto CaptureTrackingWithModRef = + [&](function_ref ModRefCallback) -> bool { + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + for (const Use &U : I->uses()) { + switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { + case UseCaptureKind::MAY_CAPTURE: + return false; + continue; + case UseCaptureKind::PASSTHROUGH: + // Instructions cannot have non-instruction users. + Worklist.push_back(cast(U.getUser())); + continue; + case UseCaptureKind::NO_CAPTURE: { + auto *UI = cast(U.getUser()); + if (DestAlloca->getParent() != UI->getParent()) + return false; + if (!FirstUser || UI->comesBefore(FirstUser)) + FirstUser = UI; + if (!LastUser || LastUser->comesBefore(UI)) + LastUser = UI; + if (UI->hasMetadata(LLVMContext::MD_noalias)) + NoAliasInstrs.insert(UI); + if (UI->isLifetimeStartOrEnd()) { + // We note these locations of these intrinsic calls so that we can + // delete them later if the optimization succeeds, this is safe + // since both llvm.lifetime.start and llvm.lifetime.end intrinsics + // conceptually fill all the bytes of the alloca with an undefined + // value. + int64_t Size = cast(UI->getOperand(0))->getSExtValue(); + if (Size < 0 || Size == DestSize) { + LifetimeMarkers.push_back(UI); + continue; + } + } + if (!ModRefCallback(UI)) + return false; + } + } + } + } + return true; + }; + + // 3. Check that dest has no Mod, except full size lifetime intrinsics, from + // the alloca to the Store. + bool DestMod = false, DestRef = false; + MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); + auto DestModRefCallback = [&](Instruction *UI) -> bool { + // We don't care about the store itself. + if (UI == Store) + return true; + ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); + // FIXME: For multi-BB cases, we need to see reachability from it to + // store. + // Bailout if Dest may be modified before Store. + if (UI->comesBefore(Store) && isModSet(Res)) + return false; + DestMod |= isModSet(Res); + DestRef |= isRefSet(Res); + return true; + }; + + if (!CaptureTrackingWithModRef(DestModRefCallback)) + return false; + + // 3. Check that, from the after the Load to the end of the BB, + // 3-1. if the dest has any Mod, src has no Ref, and + // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes. + MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); + + auto SrcModRefCallback = [&](Instruction *UI) -> bool { + // Any ModRef before Load doesn't matter, also Load and Store can be + // ignored. + if (UI->comesBefore(Load) || UI == Load || UI == Store) + return true; + ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); + if ((DestMod && isRefSet(Res)) || (DestRef && isModSet(Res))) + return false; + + return true; + }; + + Worklist.push_back(SrcAlloca); + if (!CaptureTrackingWithModRef(SrcModRefCallback)) + return false; + + // We can do the transformation. First, align the allocas appropriately. + SrcAlloca->setAlignment( + std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); + + // Merge the two allocas. + DestAlloca->replaceAllUsesWith(SrcAlloca); + + // Drop metadata on the source alloca. + SrcAlloca->dropUnknownNonDebugMetadata(); + + // Do "shrink wrap" the lifetimes. + LLVMContext &C = SrcAlloca->getContext(); + IRBuilder<> Builder(C); + + ConstantInt *AllocaSize = + cast(ConstantInt::get(Type::getInt64Ty(C), Size)); + // Create a new lifetime start marker before the first user of src or alloca + // users. + Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator()); + Builder.CreateLifetimeStart(SrcAlloca, AllocaSize); + + // Create a new lifetime end marker after the last user of src or alloca + // users. + Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator()); + Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize); + + // Remove all other lifetime markers. + for (Instruction *I : LifetimeMarkers) + eraseInstruction(I); + + // As this transformation can cause memory accesses that didn't previously + // alias to begin to alias one another, we remove !noalias metadata from any + // uses of either alloca. This is conservative, but more precision doesn't + // seem worthwhile right now. + for (Instruction *I : NoAliasInstrs) + I->setMetadata(LLVMContext::MD_noalias, nullptr); + + LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); + NumStackMove++; + return true; +} /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite @@ -1464,13 +1690,14 @@ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( AnyClobber, MemoryLocation::getForSource(M), BAA); - // There are four possible optimizations we can do for memcpy: + // There are five possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. // b) call-memcpy xform for return slot optimization. // c) memcpy from freshly alloca'd space or space that has just started // its lifetime copies undefined data, and we can therefore eliminate // the memcpy in favor of the data that was already at the destination. // d) memcpy from a just-memset'd source can be turned into memset. + // e) elimination of memcpy via stack-move optimization. if (auto *MD = dyn_cast(SrcClobber)) { if (Instruction *MI = MD->getMemoryInst()) { if (auto *CopySize = dyn_cast(M->getLength())) { @@ -1489,7 +1716,8 @@ } } if (auto *MDep = dyn_cast(MI)) - return processMemCpyMemCpyDependence(M, MDep, BAA); + if (processMemCpyMemCpyDependence(M, MDep, BAA)) + return true; if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); @@ -1508,6 +1736,27 @@ } } + // If the transfer is from a stack slot to a stack slot, then we may be able + // to perform the stack-move optimization. See the comments in + // performStackMoveOptzn() for more details. + auto *DestAlloca = dyn_cast(M->getDest()); + if (!DestAlloca) + return false; + auto *SrcAlloca = dyn_cast(M->getSource()); + if (!SrcAlloca) + return false; + ConstantInt *Len = dyn_cast(M->getLength()); + if (Len == nullptr) + return false; + if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(), + BAA)) { + // Avoid invalidating the iterator. + BBI = M->getNextNonDebugInstruction()->getIterator(); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + return false; } Index: llvm/test/Transforms/MemCpyOpt/callslot.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot.ll +++ llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -58,9 +58,10 @@ ; CHECK-LABEL: @write_src_between_call_and_memcpy( ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[SRC]]) ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false) ; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -75,7 +76,7 @@ ; CHECK-LABEL: @throw_between_call_and_mempy( ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false) -; CHECK-NEXT: call void @may_throw() #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @may_throw() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DEST_I8:%.*]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; @@ -91,7 +92,7 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], ptr [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -123,7 +124,7 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], ptr [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR3]] +; CHECK-NEXT: call void @accept_ptr(ptr [[DEST_I8]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -139,7 +140,7 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) -; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -155,7 +156,7 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) -; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -172,7 +173,7 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) -; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DEST]]) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -194,7 +195,7 @@ ; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) ; CHECK-NEXT: ret void ; CHECK: nocaptures: -; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) #[[ATTR3]] +; CHECK-NEXT: call void @accept_ptr(ptr [[DEST]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -214,7 +215,7 @@ define void @source_alignment(ptr noalias dereferenceable(128) %dst) { ; CHECK-LABEL: @source_alignment( ; CHECK-NEXT: [[SRC:%.*]] = alloca [128 x i8], align 4 -; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DST:%.*]]) #[[ATTR3]] +; CHECK-NEXT: call void @accept_ptr(ptr nocapture [[DST:%.*]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; %src = alloca [128 x i8], align 4 Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -598,8 +598,11 @@ define void @immut_param_enforced_alignment() { ; CHECK-LABEL: @immut_param_enforced_alignment( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VAL]]) ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 ; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 1 @@ -646,8 +649,11 @@ define void @immut_unescaped_alloca() { ; CHECK-LABEL: @immut_unescaped_alloca( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VAL]]) ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 ; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 4 Index: llvm/test/Transforms/MemCpyOpt/stack-move.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -20,19 +20,15 @@ declare i32 @use_readonly(ptr noundef readonly) declare i32 @use_writeonly(ptr noundef) memory(write) -; TODO: Merge alloca and remove memcpy. define void @basic_memcpy() { ; CHECK-LABEL: define void @basic_memcpy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -51,19 +47,15 @@ ret void } -; TODO: Merge alloca and remove memmove. define void @basic_memmove() { ; CHECK-LABEL: define void @basic_memmove() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -82,21 +74,16 @@ ret void } -; TODO: Merge alloca and remove load/store. ; Tests that the optimization succeeds with a load/store pair. define void @load_store() { ; CHECK-LABEL: define void @load_store() { ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -115,20 +102,16 @@ ret void } -; TODO: Merge alloca. ; Tests that merging two allocas shouldn't be more poisonous, smaller aligned src is valid. define void @align_up() { ; CHECK-LABEL: define void @align_up() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 8 @@ -146,26 +129,18 @@ ret void } -; TODO: Merge alloca and remove memcpy, shrinkwrap lifetimes. ; Tests that we correctly remove extra lifetime intrinsics when performing the ; optimization. define void @remove_extra_lifetime_intrinsics() { ; CHECK-LABEL: define void @remove_extra_lifetime_intrinsics() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -188,22 +163,18 @@ ret void } -; TODO: Merge alloca and remove memcpy. ; Tests that aliasing src or dest but no modification desn't prevent transformations. define void @alias_no_mod() { ; CHECK-LABEL: define void @alias_no_mod() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) -; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[DEST]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) +; CHECK-NEXT: [[DEST_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[SRC_ALIAS:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[SRC]], i32 0, i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -232,20 +203,16 @@ !3 = !{!"Whatever"} -; TODO: Merge alloca and remove memcpy, remove noalias metadata on src. ; Tests that we remove scoped noalias metadata from a call. define void @remove_scoped_noalias() { ; CHECK-LABEL: define void @remove_scoped_noalias() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !alias.scope !0 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]), !noalias !0 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -263,20 +230,16 @@ ret void } -; TODO: Merge alloca and remove memcpy, remove noalias metadata on src. ; Tests that we remove metadata on the merged alloca. define void @remove_alloca_metadata() { ; CHECK-LABEL: define void @remove_alloca_metadata() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4, !annotation !3 +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !alias.scope !0 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]), !noalias !0 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4, !annotation !3 @@ -294,20 +257,16 @@ ret void } -; TODO: Merge alloca and remove memcpy. ; Tests that we can merge alloca if the dest and src has only refs except lifetime intrinsics. define void @src_ref_dest_ref_after_copy() { ; CHECK-LABEL: define void @src_ref_dest_ref_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_readonly(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_readonly(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -330,14 +289,11 @@ ; CHECK-LABEL: define void @src_mod_dest_mod_after_copy() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_writeonly(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_writeonly(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -601,7 +557,6 @@ ret void } -; TODO: Prevent this transformation ; Tests that failure because copy semnatics will change if dest is replaced with src. define void @mod_dest_before_copy() { ; CHECK-LABEL: define void @mod_dest_before_copy() { @@ -634,7 +589,6 @@ ret void } -; TODO: Prevent transformations define void @mod_src_before_store_after_load() { ; CHECK-LABEL: define void @mod_src_before_store_after_load() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4