Index: llvm/test/Transforms/MemCpyOpt/stack-move.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -7,6 +7,7 @@ declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) declare void @llvm.memcpy.p2.p1.i64(ptr addrspace(2) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) @@ -444,8 +445,7 @@ ret void } - -; TODO: merge allocas for bb-separated, but logically straight +; TODO: Merge alloca define void @multi_bb_load_store(i1 %b) { ; CHECK-LABEL: define void @multi_bb_load_store ; CHECK-SAME: (i1 [[B:%.*]]) { @@ -456,6 +456,44 @@ ; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) ; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca i32, align 4 + %dest = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %dest) + store i32 42, ptr %src + %1 = call i32 @use_nocapture(ptr nocapture %src) + + %src.val = load i32, ptr %src + store i32 %src.val, ptr %dest + br label %bb0 + +bb0: + %2 = call i32 @use_nocapture(ptr nocapture %dest) + call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %dest) + ret void +} + +; FIXME: merge allocas for bb-separated, but logically straight. +; We might be handle those load/store MemCpyOpt totally +define void @multi_bb_separated_load_store(i1 %b) { +; CHECK-LABEL: define void @multi_bb_separated_load_store +; CHECK-SAME: (i1 [[B:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]) +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4 ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store i32 [[SRC_VAL]], ptr [[DEST]], align 4 @@ -534,8 +572,53 @@ ret void } -define ptr @multi_bb_loop(i32 %n) { -; CHECK-LABEL: define ptr @multi_bb_loop +; Test for BasicBlock and Instruction mixed post-dominator finding. +define void @multi_bb_pdom_test0(i1 %b) { +; CHECK-LABEL: define void @multi_bb_pdom_test0 +; CHECK-SAME: (i1 [[B:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false); 1 + br i1 %b, label %bb0, label %bb1 + +bb0: + %1 = call i32 @use_nocapture(ptr noundef nocapture %dest) + br label %bb2 + +bb1: + %2 = call i32 @use_nocapture(ptr noundef nocapture %dest) + br label %bb2 + +bb2: + %3 = call i32 @use_nocapture(ptr noundef nocapture %dest) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void + + uselistorder ptr %dest, { 2, 3, 0, 1, 4, 5 } +} + +; TODO: merge allocas for multi basicblock loop case. +define void @multi_bb_loop(i32 %n) { +; CHECK-LABEL: define void @multi_bb_loop ; CHECK-SAME: (i32 [[N:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1 @@ -549,11 +632,11 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false) ; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1 -; CHECK-NEXT: store i32 [[NEW_I]], ptr [[DEST]], align 4 +; CHECK-NEXT: store i32 [[NEW_I]], ptr [[SRC]], align 4 ; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]] ; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]] ; CHECK: loop_exit: -; CHECK-NEXT: ret ptr [[DEST]] +; CHECK-NEXT: ret void ; entry: %nlt1 = icmp slt i32 %n, 1 @@ -568,16 +651,266 @@ %i = phi i32 [ %new_i, %loop_body ], [ 1, %entry ] call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false) %new_i = add i32 %i, 1 - store i32 %new_i, ptr %dest + store i32 %new_i, ptr %src %igtn = icmp sgt i32 %new_i, %n br i1 %igtn, label %loop_exit, label %loop_body loop_exit: - ret ptr %dest + ret void +} + +; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists. +define void @multi_bb_unreachable_modref(i1 %b0) { +; CHECK-LABEL: define void @multi_bb_unreachable_modref +; CHECK-SAME: (i1 [[B0:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: ret void +; CHECK: bb0: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + br i1 %b0, label %bb0, label %exit + +exit: + %2 = call i32 @use_nocapture(ptr noundef nocapture %src) + ret void + +bb0: + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses. +define void @multi_bb_non_dominated(i1 %b0, i1 %b1) { +; CHECK-LABEL: define void @multi_bb_non_dominated +; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + br i1 %b0, label %bb0, label %bb1 + +bb0: + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false) + br label %bb2 + +bb1: + %2 = call i32 @use_nocapture(ptr noundef nocapture %src) + ; %3 = call i32 @use_writeonly(ptr noundef nocapture %dest) + br label %bb2 + +bb2: + ; %4 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; TODO: to merge following `is_def` cases, we need to do liveness analysis +; or something that distinguish the full-size-Mod as a Def. +; Tests that a memcpy that completely overwrites a stack value is a definition +; for the purposes of liveness analysis. +define void @memcpy_is_def() { +; CHECK-LABEL: define void @memcpy_is_def() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[DEST]], i64 12, i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false) + %2 = call i32 @use_nocapture(ptr noundef nocapture %dest) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %src, ptr align 4 %dest, i64 12, i1 false) + %3 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; TODO: merge allocas +; Tests that a memset that completely overwrites a stack value is a definition +; for the purposes of liveness analysis. +define void @memset_is_def() { +; CHECK-LABEL: define void @memset_is_def() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[SRC]], i8 42, i64 12, i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false) + %2 = call i32 @use_nocapture(ptr noundef nocapture %dest) + call void @llvm.memset.p0.i64(ptr align 4 %src, i8 42, i64 12, i1 false) + %3 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; TODO: merge allocas +; Tests that a store that completely overwrites a stack value is a definition +; for the purposes of liveness analysis. +define void @store_is_def() { +; CHECK-LABEL: define void @store_is_def() { +; CHECK-NEXT: [[SRC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: store i32 42, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[DEST]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: store i32 64, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca i32, align 4 + %dest = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %dest) + store i32 42, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + %2 = load i32, ptr %src + store i32 %2, ptr %dest + %3 = call i32 @use_nocapture(ptr noundef nocapture %dest) + store i32 64, ptr %src + %4 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %dest) + ret void } ; Optimization failures follow: +; Tests that a memcpy that doesn't completely overwrite a stack value is a use +; for the purposes of liveness analysis, not a definition. +define void @incomplete_memcpy() { +; CHECK-LABEL: define void @incomplete_memcpy() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 11, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 11, i1 false) + %2 = call i32 @use_nocapture(ptr noundef nocapture %dest) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; Tests that a store that doesn't completely overwrite a stack value is a use +; for the purposes of liveness analysis, not a definition. +define void @incomplete_store() { +; CHECK-LABEL: define void @incomplete_store() { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[DEST]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + %2 = load i32, ptr %src + store i32 %2, ptr %dest + %3 = call i32 @use_nocapture(ptr noundef nocapture %dest) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + ; Tests that dynamically-sized allocas are never merged. define void @dynamically_sized_alloca(i64 %i) { ; CHECK-LABEL: define void @dynamically_sized_alloca @@ -1050,6 +1383,96 @@ ret void } +; Tests that the optimization isn't performed when the source and destination +; have mod ref conflict. +define void @multi_bb_mod_ref_conflict(i1 %b) { +; CHECK-LABEL: define void @multi_bb_mod_ref_conflict +; CHECK-SAME: (i1 [[B:%.*]]) { +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]]) +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]]) +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: ret void +; + %src = alloca %struct.Foo, align 4 + %dest = alloca %struct.Foo, align 4 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src + %1 = call i32 @use_nocapture(ptr noundef nocapture %src) + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false) + br i1 %b, label %bb0, label %bb1 + +bb0: + %2 = call i32 @use_nocapture(ptr noundef nocapture %src) + br label %bb2 + +bb1: + %3 = call i32 @use_nocapture(ptr noundef nocapture %dest) + br label %bb2 + +bb2: + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest) + ret void +} + +; Tests that failure because after copy mod could be before copy on loop. +define void @multi_bb_loop_dest_mod_before_copy(i32 %n) { +; CHECK-LABEL: define void @multi_bb_loop_dest_mod_before_copy +; CHECK-SAME: (i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NLT1:%.*]] = icmp slt i32 [[N]], 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 +; CHECK-NEXT: [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]]) +; CHECK-NEXT: store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4 +; CHECK-NEXT: br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]] +; CHECK: loop_body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false) +; CHECK-NEXT: [[NEW_I]] = add i32 [[I]], 1 +; CHECK-NEXT: store i32 [[NEW_I]], ptr [[DEST]], align 4 +; CHECK-NEXT: [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]] +; CHECK-NEXT: br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]] +; CHECK: loop_exit: +; CHECK-NEXT: ret void +; +entry: + %nlt1 = icmp slt i32 %n, 1 + %src = alloca %struct.Foo, align 8 + %dest = alloca %struct.Foo, align 8 + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src) + call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest) + store %struct.Foo { i32 0, i32 1, i32 42 }, ptr %src + br i1 %nlt1, label %loop_exit, label %loop_body + +loop_body: + %i = phi i32 [ %new_i, %loop_body ], [ 1, %entry ] + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false) + %new_i = add i32 %i, 1 + store i32 %new_i, ptr %dest + %igtn = icmp sgt i32 %new_i, %n + br i1 %igtn, label %loop_exit, label %loop_body + +loop_exit: + ret void +} + ; Tests that failure because partial-sized lifetimes are counted as mod. define void @partial_lifetime() { ; CHECK-LABEL: define void @partial_lifetime() {