Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -20,6 +20,7 @@ namespace llvm { class AAResults; +class AllocaInst; class BatchAAResults; class AssumptionCache; class CallBase; @@ -77,6 +78,9 @@ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); + bool performStackMoveOptzn(Instruction *Load, Instruction *Store, + AllocaInst *DestAlloca, AllocaInst *SrcAlloca, + uint64_t Size); void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -69,6 +69,7 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); +STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); namespace { @@ -730,6 +731,23 @@ return true; } + // If this is a load-store pair from a stack slot to a stack slot, we + // might be able to perform the stack-move optimization just as we do for + // memcpys from an alloca to an alloca. + if (auto *DestAlloca = dyn_cast(SI->getPointerOperand())) { + if (auto *SrcAlloca = dyn_cast(LI->getPointerOperand())) { + if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, + DL.getTypeStoreSize(T))) { + // Avoid invalidating the iterator. + BBI = SI->getNextNonDebugInstruction()->getIterator(); + eraseInstruction(SI); + eraseInstruction(LI); + ++NumMemCpyInstr; + return true; + } + } + } + return false; } @@ -1407,6 +1425,74 @@ return true; } +bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, + AllocaInst *DestAlloca, + AllocaInst *SrcAlloca, + uint64_t Size) { + LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" + << *Store << "\n"); + + // Make sure the two allocas are in the same address space. + if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { + LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); + return false; + } + + // 1. Check that copy is full + + // Calculate the static size of the allocas to be merged, bailing out if we + // can't + const DataLayout &DL = DestAlloca->getModule()->getDataLayout(); + std::optional SrcSize = SrcAlloca->getAllocationSize(DL); + if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) { + LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); + return false; + } + std::optional DestSize = DestAlloca->getAllocationSize(DL); + if (!DestSize || DestSize->isScalable() || + Size != DestSize->getFixedValue()) { + LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); + return false; + } + + // 2. Check that src and dest allocas is not captured + if (llvm::PointerMayBeCaptured(DestAlloca, /* ReturnCaptures=*/true, + /* StoreCaptures= */ true) || + llvm::PointerMayBeCaptured(SrcAlloca, /* ReturnCaptures=*/true, + /* StoreCaptures= */ true)) + return false; + + // 3. The src has no read and no write except lifetime intrinsics, from after + // the full copy to the end of the BB. + for (auto It = ++Store->getIterator(), E = Store->getParent()->end(); It != E; + ++It) { + llvm::Instruction &I = *It; + for (auto &V : I.operands()) { + auto *AI = dyn_cast(V); + if (AI == SrcAlloca) + if (auto *II = dyn_cast(&I); + !(II && II->isLifetimeStartOrEnd())) + return false; + } + } + + // We can do the transformation. First, align the allocas appropriately. + SrcAlloca->setAlignment( + std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); + + // Merge the two allocas. + DestAlloca->replaceAllUsesWith(SrcAlloca); + + // Drop metadata on the source alloca. + SrcAlloca->dropUnknownNonDebugMetadata(); + + // TODO: remove all noalias metadata from src alloca. or not ? Does this + // change the expectation if the noalias attributes are there? + + LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); + NumStackMove++; + return false; +} /// Perform simplification of memcpy's. If we have memcpy A /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite @@ -1488,8 +1574,10 @@ } } } - if (auto *MDep = dyn_cast(MI)) - return processMemCpyMemCpyDependence(M, MDep, BAA); + if (auto *MDep = dyn_cast(MI)) { + if (processMemCpyMemCpyDependence(M, MDep, BAA)) + return true; + } if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); @@ -1508,6 +1596,26 @@ } } + // If the transfer is from a stack slot to a stack slot, then we may be able + // to perform the stack-move optimization. See the comments in + // performStackMoveOptzn() for more details. + AllocaInst *DestAlloca = dyn_cast(M->getDest()); + if (DestAlloca == nullptr) + return false; + AllocaInst *SrcAlloca = dyn_cast(M->getSource()); + if (SrcAlloca == nullptr) + return false; + ConstantInt *Len = dyn_cast(M->getLength()); + if (Len == nullptr) + return false; + if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue())) { + // Avoid invalidating the iterator. + BBI = M->getNextNonDebugInstruction()->getIterator(); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + return false; } Index: llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll =================================================================== --- llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll +++ llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll @@ -1,11 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -S -passes=memcpyopt | FileCheck --match-full-lines %s ; Alias scopes are merged by taking the intersection of domains, then the union of the scopes within those domains define i8 @test(i8 %input) { +; CHECK-LABEL: define i8 @test +; CHECK-SAME: (i8 [[INPUT:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[SRC]]), !noalias !0 +; CHECK-NEXT: store i8 [[INPUT]], ptr [[SRC]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SRC]], ptr align 8 [[SRC]], i64 1, i1 false), !alias.scope !5 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[SRC]]), !noalias !0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SRC]], ptr align 8 [[SRC]], i64 1, i1 false), !alias.scope !0 +; CHECK-NEXT: [[RET_VALUE:%.*]] = load i8, ptr [[SRC]], align 1 +; CHECK-NEXT: ret i8 [[RET_VALUE]] +; %tmp = alloca i8 %dst = alloca i8 %src = alloca i8 -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]] call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src), !noalias !4 store i8 %input, ptr %src call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0 @@ -16,9 +29,6 @@ } ; Merged scope contains "callee0: %a" and "callee0 : %b" -; CHECK-DAG: ![[CALLEE0_A:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %a"} -; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %b"} -; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]} declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) Index: llvm/test/Transforms/MemCpyOpt/callslot.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot.ll +++ llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -60,7 +60,7 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false) ; CHECK-NEXT: store i8 1, ptr [[SRC]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST]], ptr [[SRC]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[SRC]], ptr [[SRC]], i64 16, i1 false) ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] Index: llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll +++ llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -S -passes=memcpyopt | FileCheck --match-full-lines %s ; Make sure callslot optimization merges alias.scope metadata correctly when it merges instructions. @@ -13,11 +14,23 @@ ; !5 = distinct !{!5, !"callee0"} ; Which is incorrect because the lifetime.end of %src will now "noalias" the above memcpy. define i8 @test(i8 %input) { +; CHECK-LABEL: define i8 @test +; CHECK-SAME: (i8 [[INPUT:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[SRC]]), !noalias !0 +; CHECK-NEXT: store i8 [[INPUT]], ptr [[SRC]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SRC]], ptr align 8 [[SRC]], i64 1, i1 false), !alias.scope !3 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[SRC]]), !noalias !0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[SRC]], ptr align 8 [[SRC]], i64 1, i1 false), !alias.scope !0 +; CHECK-NEXT: [[RET_VALUE:%.*]] = load i8, ptr [[SRC]], align 1 +; CHECK-NEXT: ret i8 [[RET_VALUE]] +; %tmp = alloca i8 %dst = alloca i8 %src = alloca i8 ; NOTE: we're matching the full line and looking for the lack of !alias.scope here -; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 1, i1 false) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src), !noalias !3 store i8 %input, ptr %src call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false), !alias.scope !0 Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -599,6 +599,7 @@ ; CHECK-LABEL: @immut_param_enforced_alignment( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 ; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL]]) ; CHECK-NEXT: ret void ; @@ -647,6 +648,7 @@ ; CHECK-LABEL: @immut_unescaped_alloca( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 +; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 ; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]]) ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/MemCpyOpt/stack-move.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -26,13 +26,13 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -54,13 +54,12 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -82,14 +81,14 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: store i32 [[TMP2]], ptr [[DEST]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[SRC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca i32, align 4 @@ -113,13 +112,13 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 8 @@ -142,18 +141,18 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -174,19 +173,20 @@ ret void } +; TODO: remove metadata ; Tests that we remove scoped noalias metadata from a call. define void @remove_scoped_noalias() { ; CHECK-LABEL: define void @remove_scoped_noalias() { ; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !alias.scope !0 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !noalias !0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]), !noalias !0 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4 @@ -205,16 +205,16 @@ ; Tests that we remove metadata on the merged alloca. define void @remove_alloca_metadata() { ; CHECK-LABEL: define void @remove_alloca_metadata() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4, !annotation !3 +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 @constant, i64 12, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !alias.scope !0 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]), !noalias !0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]), !noalias !0 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) ; CHECK-NEXT: ret void ; %src = alloca %struct.Foo, align 4, !annotation !3 Index: llvm/test/Transforms/MemCpyOpt/stackrestore.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -56,10 +56,10 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[ARGMEM]], ptr align 1 @str, i32 9, i1 false) ; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds [10 x i8], ptr [[ARGMEM]], i32 0, i32 9 ; CHECK-NEXT: store i8 0, ptr [[P10]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[TMPMEM]], ptr [[ARGMEM]], i32 10, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[ARGMEM]], ptr [[ARGMEM]], i32 10, i1 false) ; CHECK-NEXT: call void @llvm.stackrestore(ptr [[INALLOCA_SAVE]]) ; CHECK-NEXT: [[HEAP:%.*]] = call ptr @malloc(i32 9) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[HEAP]], ptr [[TMPMEM]], i32 9, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[HEAP]], ptr [[ARGMEM]], i32 9, i1 false) ; CHECK-NEXT: call void @useit(ptr [[HEAP]]) ; CHECK-NEXT: ret i32 0 ;