Index: test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll =================================================================== --- test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -tbaa -basicaa -memcpyopt -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64" @@ -5,11 +6,12 @@ ; The second memcpy is redundant and can be deleted. There's an intervening store, but ; it has a TBAA tag which declares that it is unrelated. -; CHECK: @foo -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 -; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]] -; CHECK-NEXT: ret void define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 +; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !3 +; CHECK-NEXT: ret void +; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2 store i8 2, i8* %s, align 1, !tbaa !1 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2 @@ -18,8 +20,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0} -; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} !0 = !{!"tbaa root", null} !1 = !{!3, !3, i64 0} !2 = !{!4, !4, i64 0} Index: test/Transforms/GVN/pr24426.ll =================================================================== --- test/Transforms/GVN/pr24426.ll +++ test/Transforms/GVN/pr24426.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -mldst-motion -gvn -S | FileCheck %s declare void @check(i8) @@ -5,12 +6,19 @@ declare void @write(i8* %res) define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[TMP1:%.*]] = alloca [10 x i8] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [10 x i8]* [[TMP1]] to i8* +; CHECK-NEXT: call void @write(i8* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]] +; CHECK-NEXT: call void @check(i8 [[TMP3]]) +; CHECK-NEXT: ret void +; %1 = alloca [10 x i8] %2 = bitcast [10 x i8]* %1 to i8* call void @write(i8* %2) %3 = load i8, i8* %2 -; CHECK-NOT: undef call void @check(i8 %3) ret void Index: test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll =================================================================== --- test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp ; PR2077 Index: test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll =================================================================== --- test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy." target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" Index: test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll =================================================================== --- test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll +++ test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s ; PR10067 ; Make sure the call+copy isn't optimized in such a way that @@ -12,10 +13,25 @@ declare void @bar(%struct1* nocapture sret %agg.result) nounwind define i32 @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[X:%.*]] = alloca %struct1, align 8 +; CHECK-NEXT: [[Y:%.*]] = alloca %struct2, align 8 +; CHECK-NEXT: call void @bar(%struct1* sret [[X]]) #0 +; CHECK-NEXT: [[GEPN1:%.*]] = getelementptr inbounds %struct2, %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: store i32 0, i32* [[GEPN1]], align 8 +; CHECK-NEXT: [[GEPN2:%.*]] = getelementptr inbounds %struct2, %struct2* [[Y]], i32 0, i32 0, i32 1 +; CHECK-NEXT: store i32 0, i32* [[GEPN2]], align 4 +; CHECK-NEXT: [[BIT1:%.*]] = bitcast %struct1* [[X]] to i64* +; CHECK-NEXT: [[BIT2:%.*]] = bitcast %struct2* [[Y]] to i64* +; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[BIT1]], align 8 +; CHECK-NEXT: store i64 [[LOAD]], i64* [[BIT2]], align 8 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr %struct2, %struct2* [[Y]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: ret i32 [[RET]] +; %x = alloca %struct1, align 8 %y = alloca %struct2, align 8 call void @bar(%struct1* sret %x) nounwind -; CHECK: call void @bar(%struct1* sret %x) %gepn1 = getelementptr inbounds %struct2, %struct2* %y, i32 0, i32 0, i32 0 store i32 0, i32* %gepn1, align 8 @@ -27,8 +43,6 @@ %load = load i64, i64* %bit1, align 8 store i64 %load, i64* %bit2, align 8 -; CHECK: %load = load i64, i64* %bit1, align 8 -; CHECK: store i64 %load, i64* %bit2, align 8 %gep1 = getelementptr %struct2, %struct2* %y, i32 0, i32 0, i32 0 %ret = load i32, i32* %gep1 Index: test/Transforms/MemCpyOpt/align.ll =================================================================== --- test/Transforms/MemCpyOpt/align.ll +++ test/Transforms/MemCpyOpt/align.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" @@ -9,7 +10,14 @@ define void @foo(i32* %p) { ; CHECK-LABEL: @foo( -; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false) +; CHECK-NEXT: [[A0:%.*]] = getelementptr i32, i32* %p, i64 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr i32, i32* %p, i64 1 +; CHECK-NEXT: [[A2:%.*]] = getelementptr i32, i32* %p, i64 2 +; CHECK-NEXT: [[A3:%.*]] = getelementptr i32, i32* %p, i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 4, i1 false) +; CHECK-NEXT: ret void +; %a0 = getelementptr i32, i32* %p, i64 0 store i32 0, i32* %a0, align 4 %a1 = getelementptr i32, i32* %p, i64 1 @@ -25,8 +33,14 @@ define void @bar() { ; CHECK-LABEL: @bar( -; CHECK: %a4 = alloca i32, align 8 -; CHECK-NOT: memcpy +; CHECK-NEXT: [[A4:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8:%.*]] = alloca i32, align 8 +; CHECK-NEXT: [[A8_CAST:%.*]] = bitcast i32* [[A8]] to i8* +; CHECK-NEXT: [[A4_CAST:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: [[A41:%.*]] = bitcast i32* [[A4]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A41]], i8 0, i64 4, i32 8, i1 false) +; CHECK-NEXT: ret void +; %a4 = alloca i32, align 4 %a8 = alloca i32, align 8 %a8.cast = bitcast i32* %a8 to i8* Index: test/Transforms/MemCpyOpt/atomic.ll =================================================================== --- test/Transforms/MemCpyOpt/atomic.ll +++ test/Transforms/MemCpyOpt/atomic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -11,8 +12,16 @@ ; memcpyopt should not touch atomic ops define void @test1() nounwind uwtable ssp { -; CHECK: test1 -; CHECK: store atomic +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[X:%.*]] = alloca [101 x i32], align 16 +; CHECK-NEXT: [[BC:%.*]] = bitcast [101 x i32]* [[X]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[BC]], i8 0, i64 400, i32 16, i1 false) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 100 +; CHECK-NEXT: store atomic i32 0, i32* [[GEP1]] unordered, align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 0 +; CHECK-NEXT: call void @otherf(i32* [[GEP2]]) +; CHECK-NEXT: ret void +; %x = alloca [101 x i32], align 16 %bc = bitcast [101 x i32]* %x to i8* call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false) @@ -25,17 +34,21 @@ ; memcpyopt across unordered store define void @test2() nounwind uwtable ssp { -; CHECK: test2 -; CHECK: call -; CHECK-NEXT: store atomic -; CHECK-NEXT: call +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[OLD:%.*]] = alloca i32 +; CHECK-NEXT: [[NEW:%.*]] = alloca i32 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: store atomic i32 0, i32* @x unordered, align 4 +; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; CHECK-NEXT: ret void +; %old = alloca i32 %new = alloca i32 call void @otherf(i32* nocapture %old) store atomic i32 0, i32* @x unordered, align 4 %v = load i32, i32* %old store i32 %v, i32* %new - call void @otherf(i32* nocapture %new) + call void @otherf(i32* nocapture %new) ret void } Index: test/Transforms/MemCpyOpt/callslot_aa.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_aa.ll +++ test/Transforms/MemCpyOpt/callslot_aa.ll @@ -1,12 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %T = type { i64, i64 } define void @test(i8* %src) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8 +; CHECK-NEXT: [[DST:%.*]] = alloca i8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 1, i32 8, i1 false) +; CHECK-NEXT: ret void +; %tmp = alloca i8 %dst = alloca i8 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false) Index: test/Transforms/MemCpyOpt/callslot_deref.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_deref.ll +++ test/Transforms/MemCpyOpt/callslot_deref.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,8 +8,13 @@ ; all bytes of %dst that are touch by the memset are dereferenceable define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { ; CHECK-LABEL: @must_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: [[DST1:%.*]] = bitcast i8* %dst to [4096 x i8]* +; CHECK-NEXT: [[DST12:%.*]] = bitcast [4096 x i8]* [[DST1]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST12]], i8 0, i64 4096, i32 1, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) @@ -20,8 +26,12 @@ ; We can't remove the memcpy, but we can turn it into an independent memset. define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { ; CHECK-LABEL: @must_not_remove_memcpy( -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P]], i8 0, i64 4096, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 4096, i32 1, i1 false) +; CHECK-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) Index: test/Transforms/MemCpyOpt/callslot_throw.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_throw.ll +++ test/Transforms/MemCpyOpt/callslot_throw.ll @@ -1,34 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s declare void @may_throw(i32* nocapture %x) -; CHECK-LABEL: define void @test1( define void @test1(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: store i32 [[LOAD]], i32* %x, align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) %load = load i32, i32* %t, align 4 store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } declare void @always_throws() -; CHECK-LABEL: define void @test2( define void @test2(i32* nocapture noalias dereferenceable(4) %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @may_throw(i32* nonnull [[T]]) #0 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; CHECK-NEXT: call void @always_throws() +; CHECK-NEXT: store i32 [[LOAD]], i32* %x, align 4 +; CHECK-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) nounwind %load = load i32, i32* %t, align 4 call void @always_throws() store i32 %load, i32* %x, align 4 -; CHECK: %[[t:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]]) -; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4 -; CHECK-NEXT: call void @always_throws() -; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4 ret void } Index: test/Transforms/MemCpyOpt/capturing-func.ll =================================================================== --- test/Transforms/MemCpyOpt/capturing-func.ll +++ test/Transforms/MemCpyOpt/capturing-func.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s target datalayout = "e" @@ -6,17 +7,20 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @test() { +; Check that the transformation isn't applied if the called function can +; capture the pointer argument (i.e. the nocapture attribute isn't present) +; CHECK-LABEL: @test( +; CHECK-NEXT: [[PTR1:%.*]] = alloca i8 +; CHECK-NEXT: [[PTR2:%.*]] = alloca i8 +; CHECK-NEXT: call void @foo(i8* [[PTR2]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i32 1, i1 false) +; CHECK-NEXT: call void @foo(i8* [[PTR1]]) +; CHECK-NEXT: ret void +; %ptr1 = alloca i8 %ptr2 = alloca i8 call void @foo(i8* %ptr2) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i32 1, i1 false) call void @foo(i8* %ptr1) ret void - - ; Check that the transformation isn't applied if the called function can - ; capture the pointer argument (i.e. the nocapture attribute isn't present) - ; CHECK-LABEL: @test( - ; CHECK: call void @foo(i8* %ptr2) - ; CHECK-NEXT: call void @llvm.memcpy - ; CHECK-NEXT: call void @foo(i8* %ptr1) } Index: test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/fca2memcpy.ll +++ test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64" @@ -6,41 +7,49 @@ %S = type { i8*, i8, i32 } define void @copy(%S* %src, %S* %dst) { -; CHECK-LABEL: copy -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @copy( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: noaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @noaliasdst(%S* %src, %S* noalias %dst) { -; CHECK-LABEL: noaliasdst -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @noaliasdst( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void } define void @destroysrc(%S* %src, %S* %dst) { -; CHECK-LABEL: destroysrc -; CHECK: load %S, %S* %src -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: store %S %1, %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroysrc( +; CHECK-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 16, i32 8, i1 false) +; CHECK-NEXT: store %S [[TMP1]], %S* %dst +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -48,11 +57,14 @@ } define void @destroynoaliassrc(%S* noalias %src, %S* %dst) { -; CHECK-LABEL: destroynoaliassrc -; CHECK-NOT: load -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64 -; CHECK-NEXT: ret void +; CHECK-LABEL: @destroynoaliassrc( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %dst to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 8, i1 false) +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -60,12 +72,14 @@ } define void @copyalias(%S* %src, %S* %dst) { -; CHECK-LABEL: copyalias -; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src -; CHECK-NOT: load -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: store %S [[LOAD]], %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @copyalias( +; CHECK-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %dst to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; CHECK-NEXT: store %S [[TMP1]], %S* %dst +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src %2 = load %S, %S* %src store %S %1, %S* %dst @@ -76,11 +90,14 @@ ; If the store address is computed ina complex manner, make ; sure we lift the computation as well if needed and possible. define void @addrproducer(%S* %src, %S* %dst) { -; CHECK-LABEL: addrproducer -; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1 -; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 -; CHECK-NEXT: store %S undef, %S* %dst -; CHECK-NEXT: ret void +; CHECK-LABEL: @addrproducer( +; CHECK-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; CHECK-NEXT: store %S undef, %S* %dst +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dst2 = getelementptr %S , %S* %dst, i64 1 @@ -89,7 +106,14 @@ } define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { -; CHECK-LABEL: aliasaddrproducer +; CHECK-LABEL: @aliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; CHECK-NEXT: store %S undef, %S* %dst +; CHECK-NEXT: [[DSTINDEX:%.*]] = load i32, i32* %dstidptr +; CHECK-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i32 [[DSTINDEX]] +; CHECK-NEXT: store %S [[TMP1]], %S* [[DST2]] +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dstindex = load i32, i32* %dstidptr @@ -99,7 +123,16 @@ } define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) { -; CHECK-LABEL: noaliasaddrproducer +; CHECK-LABEL: @noaliasaddrproducer( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* %dstidptr +; CHECK-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP1]], 1 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i32 [[DSTINDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST2]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; CHECK-NEXT: store %S undef, %S* %src +; CHECK-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %src %2 = load i32, i32* %dstidptr Index: test/Transforms/MemCpyOpt/form-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/form-memset.ll +++ test/Transforms/MemCpyOpt/form-memset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s ; All the stores in this example should be merged into a single memset. @@ -6,53 +7,74 @@ target triple = "i386-apple-darwin8" define void @test1(i8 signext %c) nounwind { -entry: - %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] - %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] - store i8 %c, i8* %tmp, align 1 - %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] - store i8 %c, i8* %tmp5, align 1 - %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] - store i8 %c, i8* %tmp9, align 1 - %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] - store i8 %c, i8* %tmp13, align 1 - %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] - store i8 %c, i8* %tmp17, align 1 - %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] - store i8 %c, i8* %tmp21, align 1 - %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] - store i8 %c, i8* %tmp25, align 1 - %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] - store i8 %c, i8* %tmp29, align 1 - %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] - store i8 %c, i8* %tmp33, align 1 - %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] - store i8 %c, i8* %tmp37, align 1 - %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] - store i8 %c, i8* %tmp41, align 1 - %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] - store i8 %c, i8* %tmp45, align 1 - %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] - store i8 %c, i8* %tmp49, align 1 - %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] - store i8 %c, i8* %tmp53, align 1 - %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] - store i8 %c, i8* %tmp57, align 1 - %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] - store i8 %c, i8* %tmp61, align 1 - %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] - store i8 %c, i8* %tmp65, align 1 - %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] - store i8 %c, i8* %tmp69, align 1 - %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] - store i8 %c, i8* %tmp73, align 1 - %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind - ret void ; CHECK-LABEL: @test1( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64 -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = alloca [19 x i8] +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 5 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 6 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 7 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 8 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 9 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 10 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 11 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 12 +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 13 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 14 +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 15 +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 17 +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 18 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP]], i8 %c, i64 19, i32 1, i1 false) +; CHECK-NEXT: [[TMP76:%.*]] = call i32 (...) @bar([19 x i8]* [[X]]) #0 +; CHECK-NEXT: ret void +; +entry: + %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] + %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] + store i8 %c, i8* %tmp, align 1 + %tmp5 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 1 ; [#uses=1] + store i8 %c, i8* %tmp5, align 1 + %tmp9 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 2 ; [#uses=1] + store i8 %c, i8* %tmp9, align 1 + %tmp13 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 3 ; [#uses=1] + store i8 %c, i8* %tmp13, align 1 + %tmp17 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 4 ; [#uses=1] + store i8 %c, i8* %tmp17, align 1 + %tmp21 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 5 ; [#uses=1] + store i8 %c, i8* %tmp21, align 1 + %tmp25 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 6 ; [#uses=1] + store i8 %c, i8* %tmp25, align 1 + %tmp29 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 7 ; [#uses=1] + store i8 %c, i8* %tmp29, align 1 + %tmp33 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 8 ; [#uses=1] + store i8 %c, i8* %tmp33, align 1 + %tmp37 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 9 ; [#uses=1] + store i8 %c, i8* %tmp37, align 1 + %tmp41 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 10 ; [#uses=1] + store i8 %c, i8* %tmp41, align 1 + %tmp45 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 11 ; [#uses=1] + store i8 %c, i8* %tmp45, align 1 + %tmp49 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 12 ; [#uses=1] + store i8 %c, i8* %tmp49, align 1 + %tmp53 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 13 ; [#uses=1] + store i8 %c, i8* %tmp53, align 1 + %tmp57 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 14 ; [#uses=1] + store i8 %c, i8* %tmp57, align 1 + %tmp61 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 15 ; [#uses=1] + store i8 %c, i8* %tmp61, align 1 + %tmp65 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 16 ; [#uses=1] + store i8 %c, i8* %tmp65, align 1 + %tmp69 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 17 ; [#uses=1] + store i8 %c, i8* %tmp69, align 1 + %tmp73 = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 18 ; [#uses=1] + store i8 %c, i8* %tmp73, align 1 + %tmp76 = call i32 (...) @bar( [19 x i8]* %x ) nounwind + ret void } declare i32 @bar(...) @@ -61,104 +83,150 @@ define void @test2() nounwind { -entry: - %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] - %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] - %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] - store i8 -1, i8* %tmp20, align 1 - %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] - store i8 -1, i8* %tmp23, align 1 - %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] - store i8 -1, i8* %tmp26, align 1 - %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] - store i8 -1, i8* %tmp29, align 1 - %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] - store i8 -1, i8* %tmp32, align 1 - %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] - store i8 -1, i8* %tmp35, align 1 - %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] - store i8 -1, i8* %tmp38, align 1 - %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] - store i8 -1, i8* %tmp41, align 1 - %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp43, align 2 - %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp46, align 2 - %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp57, align 2 - %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp60, align 2 - %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp71, align 2 - %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp74, align 2 - %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp85, align 2 - %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp88, align 2 - %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp99, align 2 - %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp102, align 2 - %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp113, align 2 - %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp116, align 2 - %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp127, align 2 - %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp130, align 2 - %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp141, align 8 - %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp144, align 2 - %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] - store i16 0, i16* %tmp148, align 2 - %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] - store i16 0, i16* %tmp151, align 2 - %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] - store i16 0, i16* %tmp162, align 2 - %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] - store i16 0, i16* %tmp165, align 2 - %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] - store i16 0, i16* %tmp176, align 2 - %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] - store i16 0, i16* %tmp179, align 2 - %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] - store i16 0, i16* %tmp190, align 2 - %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] - store i16 0, i16* %tmp193, align 2 - %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] - store i16 0, i16* %tmp204, align 2 - %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] - store i16 0, i16* %tmp207, align 2 - %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] - store i16 0, i16* %tmp218, align 2 - %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] - store i16 0, i16* %tmp221, align 2 - %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] - store i16 0, i16* %tmp232, align 2 - %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] - store i16 0, i16* %tmp235, align 2 - %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] - store i16 0, i16* %tmp246, align 8 - %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] - store i16 0, i16* %tmp249, align 2 - %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] - call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind - ret void - ; CHECK-LABEL: @test2( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false) -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false) -; CHECK-NOT: store -; CHECK: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_IDX:%.*]] = alloca [8 x i8] +; CHECK-NEXT: [[LEFT_MVD:%.*]] = alloca [8 x %struct.MV] +; CHECK-NEXT: [[UP_MVD:%.*]] = alloca [8 x %struct.MV] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 7 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 6 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 5 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 2 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 1 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 0 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP41]], i8 -1, i64 8, i32 1, i1 false) +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP85:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP88:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP99:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP102:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP113:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP116:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP127:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP130:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP141:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[TMP141]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 32, i32 8, i1 false) +; CHECK-NEXT: [[TMP151:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 1 +; CHECK-NEXT: [[TMP162:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 0 +; CHECK-NEXT: [[TMP165:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 1 +; CHECK-NEXT: [[TMP176:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 0 +; CHECK-NEXT: [[TMP179:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 1 +; CHECK-NEXT: [[TMP190:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 0 +; CHECK-NEXT: [[TMP193:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 1 +; CHECK-NEXT: [[TMP204:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 0 +; CHECK-NEXT: [[TMP207:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 1 +; CHECK-NEXT: [[TMP218:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 0 +; CHECK-NEXT: [[TMP221:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 1 +; CHECK-NEXT: [[TMP232:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP235:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP246:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP249:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[UP_MVD252:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[LEFT_MVD253:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 32, i32 8, i1 false) +; CHECK-NEXT: call void @foo(%struct.MV* [[UP_MVD252]], %struct.MV* [[LEFT_MVD253]], i8* [[TMP41]]) #0 +; CHECK-NEXT: ret void +; +entry: + %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] + %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %up_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] + %tmp20 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 7 ; [#uses=1] + store i8 -1, i8* %tmp20, align 1 + %tmp23 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 6 ; [#uses=1] + store i8 -1, i8* %tmp23, align 1 + %tmp26 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 5 ; [#uses=1] + store i8 -1, i8* %tmp26, align 1 + %tmp29 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 4 ; [#uses=1] + store i8 -1, i8* %tmp29, align 1 + %tmp32 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 3 ; [#uses=1] + store i8 -1, i8* %tmp32, align 1 + %tmp35 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 2 ; [#uses=1] + store i8 -1, i8* %tmp35, align 1 + %tmp38 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 1 ; [#uses=1] + store i8 -1, i8* %tmp38, align 1 + %tmp41 = getelementptr [8 x i8], [8 x i8]* %ref_idx, i32 0, i32 0 ; [#uses=2] + store i8 -1, i8* %tmp41, align 1 + %tmp43 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp43, align 2 + %tmp46 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp46, align 2 + %tmp57 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp57, align 2 + %tmp60 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp60, align 2 + %tmp71 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp71, align 2 + %tmp74 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp74, align 2 + %tmp85 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp85, align 2 + %tmp88 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp88, align 2 + %tmp99 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp99, align 2 + %tmp102 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp102, align 2 + %tmp113 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp113, align 2 + %tmp116 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp116, align 2 + %tmp127 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp127, align 2 + %tmp130 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp130, align 2 + %tmp141 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp141, align 8 + %tmp144 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp144, align 2 + %tmp148 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0 ; [#uses=1] + store i16 0, i16* %tmp148, align 2 + %tmp151 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1 ; [#uses=1] + store i16 0, i16* %tmp151, align 2 + %tmp162 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0 ; [#uses=1] + store i16 0, i16* %tmp162, align 2 + %tmp165 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1 ; [#uses=1] + store i16 0, i16* %tmp165, align 2 + %tmp176 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0 ; [#uses=1] + store i16 0, i16* %tmp176, align 2 + %tmp179 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1 ; [#uses=1] + store i16 0, i16* %tmp179, align 2 + %tmp190 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0 ; [#uses=1] + store i16 0, i16* %tmp190, align 2 + %tmp193 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1 ; [#uses=1] + store i16 0, i16* %tmp193, align 2 + %tmp204 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0 ; [#uses=1] + store i16 0, i16* %tmp204, align 2 + %tmp207 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1 ; [#uses=1] + store i16 0, i16* %tmp207, align 2 + %tmp218 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0 ; [#uses=1] + store i16 0, i16* %tmp218, align 2 + %tmp221 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1 ; [#uses=1] + store i16 0, i16* %tmp221, align 2 + %tmp232 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0 ; [#uses=1] + store i16 0, i16* %tmp232, align 2 + %tmp235 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1 ; [#uses=1] + store i16 0, i16* %tmp235, align 2 + %tmp246 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0 ; [#uses=1] + store i16 0, i16* %tmp246, align 8 + %tmp249 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1 ; [#uses=1] + store i16 0, i16* %tmp249, align 2 + %up_mvd252 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %up_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + %left_mvd253 = getelementptr [8 x %struct.MV], [8 x %struct.MV]* %left_mvd, i32 0, i32 0 ; <%struct.MV*> [#uses=1] + call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind + ret void + } declare void @foo(%struct.MV*, %struct.MV*, i8*) @@ -166,49 +234,75 @@ ; Store followed by memset. define void @test3(i32* nocapture %P) nounwind ssp { -entry: - %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 - store i32 0, i32* %arrayidx, align 4 - %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 - %0 = bitcast i32* %add.ptr to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) - ret void ; CHECK-LABEL: @test3( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: ret void +; +entry: + %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 + store i32 0, i32* %arrayidx, align 4 + %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 + %0 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) + ret void } ; store followed by memset, different offset scenario define void @test4(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %P, align 4 %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 %0 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) ret void -; CHECK-LABEL: @test4( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind ; Memset followed by store. define void @test5(i32* nocapture %P) nounwind ssp { -entry: - %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 - %0 = bitcast i32* %add.ptr to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) - %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 - store i32 0, i32* %arrayidx, align 4 - ret void ; CHECK-LABEL: @test5( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 2 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: ret void +; +entry: + %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 + %0 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false) + %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 + store i32 0, i32* %arrayidx, align 4 + ret void } ;; Memset followed by memset. define void @test6(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 24, i32 1, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i32* %P to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false) @@ -216,13 +310,20 @@ %1 = bitcast i32* %add.ptr to i8* tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false) ret void -; CHECK-LABEL: @test6( -; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false) } ; More aggressive heuristic ; rdar://9892684 define void @test7(i32* nocapture %c) nounwind optsize { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %c, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* %c, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* %c, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %c, i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* %c to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 -1, i64 20, i32 4, i1 false) +; CHECK-NEXT: ret void +; store i32 -1, i32* %c, align 4 %1 = getelementptr inbounds i32, i32* %c, i32 1 store i32 -1, i32* %1, align 4 @@ -232,26 +333,33 @@ store i32 -1, i32* %3, align 4 %4 = getelementptr inbounds i32, i32* %c, i32 4 store i32 -1, i32* %4, align 4 -; CHECK-LABEL: @test7( -; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false) ret void } %struct.test8 = type { [4 x i32] } define void @test8() { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca %struct.test8, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.test8* [[MEMTMP]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 16 +; CHECK-NEXT: ret void +; entry: %memtmp = alloca %struct.test8, align 16 %0 = bitcast %struct.test8* %memtmp to <4 x i32>* store <4 x i32> , <4 x i32>* %0, align 16 ret void -; CHECK-LABEL: @test8( -; CHECK: store <4 x i32> , <4 x i32>* %0, align 16 } @test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16 define void @test9() nounwind { +; CHECK-LABEL: @test9( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) +; CHECK-NEXT: ret void +; store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2 @@ -269,24 +377,31 @@ store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1 ret void -; CHECK-LABEL: @test9( -; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) } ; PR19092 define void @test10(i8* nocapture %P) nounwind { +; CHECK-LABEL: @test10( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) +; CHECK-NEXT: ret void +; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false) ret void -; CHECK-LABEL: @test10( -; CHECK-NOT: memset -; CHECK: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) -; CHECK-NOT: memset -; CHECK: ret void } ; Memset followed by odd store. define void @test11(i32* nocapture %P) nounwind ssp { +; CHECK-LABEL: @test11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 0 +; CHECK-NEXT: [[ARRAYIDX_CAST:%.*]] = bitcast i32* [[ARRAYIDX]] to i96* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i96* [[ARRAYIDX_CAST]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 1, i64 23, i32 4, i1 false) +; CHECK-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 %0 = bitcast i32* %add.ptr to i8* @@ -295,7 +410,4 @@ %arrayidx.cast = bitcast i32* %arrayidx to i96* store i96 310698676526526814092329217, i96* %arrayidx.cast, align 4 ret void -; CHECK-LABEL: @test11( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 1, i64 23, i32 4, i1 false) } Index: test/Transforms/MemCpyOpt/invariant.start.ll =================================================================== --- test/Transforms/MemCpyOpt/invariant.start.ll +++ test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start ; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s @@ -18,30 +19,32 @@ ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be transformed to a memmove. define void @test1(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca %0, align 16 +; CHECK-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[R]], i8* %P, i32 32, i32 16, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* [[R]], i32 32, i32 16, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false) ret void -; CHECK-LABEL: @test1( -; CHECK-NEXT: %memtmp = alloca %0, align 16 -; CHECK-NEXT: %R = bitcast %0* %memtmp to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false) -; CHECK-NEXT: ret void } ; The invariant.start intrinsic does not inhibit tranforming the memcpy to a ; memset. define void @test2(i8* %dst1, i8* %dst2, i8 %c) { -; CHECK-LABEL: define void @test2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) -; CHECK-NEXT: ret void +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false) Index: test/Transforms/MemCpyOpt/lifetime.ll =================================================================== --- test/Transforms/MemCpyOpt/lifetime.ll +++ test/Transforms/MemCpyOpt/lifetime.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O1 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to @@ -8,10 +9,13 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) #1 define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) { +; CHECK-LABEL: @_ZN4CordC2EOS_( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* %arg1, i64 7 +; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: ret void +; bb: -; CHECK-LABEL: @_ZN4CordC2EOS_ -; CHECK-NOT: call void @llvm.lifetime.start -; CHECK: ret void %tmp = alloca [8 x i8], align 8 %tmp5 = bitcast [8 x i8]* %tmp to i8* call void @llvm.lifetime.start(i64 16, i8* %tmp5) Index: test/Transforms/MemCpyOpt/loadstore-sret.ll =================================================================== --- test/Transforms/MemCpyOpt/loadstore-sret.ll +++ test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -basicaa -memcpyopt | FileCheck %s ; @@ -6,19 +7,22 @@ %"class.std::auto_ptr" = type { i32* } -; CHECK-LABEL: @_Z3foov( define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp { +; CHECK-LABEL: @_Z3foov( +; CHECK-NEXT: _ZNSt8auto_ptrIiED1Ev.exit: +; CHECK-NEXT: [[TEMP_LVALUE:%.*]] = alloca %"class.std::auto_ptr", align 8 +; CHECK-NEXT: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) +; CHECK-NEXT: [[TMP_I_I:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[TEMP_LVALUE]], i64 0, i32 0 +; CHECK-NEXT: [[TMP_I_I4:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 +; CHECK-NEXT: ret void +; _ZNSt8auto_ptrIiED1Ev.exit: %temp.lvalue = alloca %"class.std::auto_ptr", align 8 -; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue) %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0 -; CHECK-NOT: load %tmp2.i.i = load i32*, i32** %tmp.i.i, align 8 %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 -; CHECK-NOT: store store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8 -; CHECK: ret void ret void } Index: test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll +++ test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll @@ -1,27 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -instcombine -S < %s | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) { -entry-block: - %a = alloca [8 x i64], align 8 - %a.cast = bitcast [8 x i64]* %a to i8* - call void @llvm.lifetime.start(i64 64, i8* %a.cast) - call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) - %sret.cast = bitcast [8 x i64]* %sret to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) - call void @llvm.lifetime.end(i64 64, i8* %a.cast) - ret void - ; CHECK-LABEL: @foo( -; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64 -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[SRET1:%.*]] = bitcast [8 x i64]* %sret to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRET1]], i8 0, i64 64, i32 8, i1 false) +; CHECK-NEXT: ret void +; +entry-block: + %a = alloca [8 x i64], align 8 + %a.cast = bitcast [8 x i64]* %a to i8* + call void @llvm.lifetime.start(i64 64, i8* %a.cast) + call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false) + %sret.cast = bitcast [8 x i64]* %sret to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false) + call void @llvm.lifetime.end(i64 64, i8* %a.cast) + ret void + } define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[A:%.*]] = alloca [8 x i64], align 8 +; CHECK-NEXT: [[A_CAST:%.*]] = bitcast [8 x i64]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 64, i8* [[A_CAST]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A_CAST]], i8 0, i64 64, i32 8, i1 false) +; CHECK-NEXT: [[SRET_CAST:%.*]] = bitcast [8 x i64]* %sret to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRET_CAST]], i8 0, i64 64, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A_CAST]], i8 42, i64 32, i32 8, i1 false) +; CHECK-NEXT: [[OUT_CAST:%.*]] = bitcast [8 x i64]* %out to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[OUT_CAST]], i8* [[A_CAST]], i64 64, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end(i64 64, i8* [[A_CAST]]) +; CHECK-NEXT: ret void +; entry-block: %a = alloca [8 x i64], align 8 %a.cast = bitcast [8 x i64]* %a to i8* @@ -35,17 +51,6 @@ call void @llvm.lifetime.end(i64 64, i8* %a.cast) ret void -; CHECK-LABEL: @bar( -; CHECK: %[[a:[^=]+]] = alloca [8 x i64] -; CHECK: %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64 -; CHECK: %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64 -; CHECK: call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32 -; CHECK: %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8* -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64 -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind Index: test/Transforms/MemCpyOpt/memcpy-to-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s @cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4 @@ -6,14 +7,18 @@ declare void @foo(i32*) nounwind define void @test1() nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[ARR:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[ARR_I8:%.*]] = bitcast [3 x i32]* [[ARR]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[ARR_I8]], i8 -1, i64 12, i32 4, i1 false) +; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[ARR]], i64 0, i64 0 +; CHECK-NEXT: call void @foo(i32* [[ARRAYDECAY]]) #1 +; CHECK-NEXT: ret void +; %arr = alloca [3 x i32], align 4 %arr_i8 = bitcast [3 x i32]* %arr to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false) %arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0 call void @foo(i32* %arraydecay) nounwind ret void -; CHECK-LABEL: @test1( -; CHECK: call void @llvm.memset -; CHECK-NOT: call void @llvm.memcpy -; CHECK: ret void } Index: test/Transforms/MemCpyOpt/memcpy-undef.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-undef.ll +++ test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -5,7 +6,18 @@ %struct.foo = type { i8, [7 x i8], i32 } +; Check that the memcpy is removed. define i32 @test1(%struct.foo* nocapture %foobie) nounwind noinline ssp uwtable { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[BLETCH_SROA_1:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 +; CHECK-NEXT: store i8 98, i8* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 1, i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[BLETCH_SROA_1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2 +; CHECK-NEXT: store i32 20, i32* [[TMP4]], align 4 +; CHECK-NEXT: ret i32 undef +; %bletch.sroa.1 = alloca [7 x i8], align 1 %1 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 store i8 98, i8* %1, align 4 @@ -15,30 +27,29 @@ %4 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2 store i32 20, i32* %4, align 4 ret i32 undef - -; Check that the memcpy is removed. -; CHECK-LABEL: @test1( -; CHECK-NOT: call void @llvm.memcpy } +; Check that the memcpy is removed. define void @test2(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { - call void @llvm.lifetime.start(i64 8, i8* %in) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) - ret void - -; Check that the memcpy is removed. ; CHECK-LABEL: @test2( -; CHECK-NOT: call void @llvm.memcpy -} - -define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { - call void @llvm.lifetime.start(i64 4, i8* %in) +; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* %in) +; CHECK-NEXT: ret void +; + call void @llvm.lifetime.start(i64 8, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) ret void +} ; Check that the memcpy is not removed. +define void @test3(i8* sret noalias nocapture %out, i8* %in) nounwind noinline ssp uwtable { ; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %in) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.lifetime.start(i64 4, i8* %in) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) + ret void } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind Index: test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy.ll +++ test/Transforms/MemCpyOpt/memcpy.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -7,6 +8,16 @@ %1 = type { i32, i32 } define void @test1(%0* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = alloca %0 +; CHECK-NEXT: [[TMP5:%.*]] = fsub x86_fp80 0xK80000000000000000000, %z.1 +; CHECK-NEXT: call void @ccoshl(%0* sret [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 %z.0) #0 +; CHECK-NEXT: [[TMP219:%.*]] = bitcast %0* [[TMP2]] to i8* +; CHECK-NEXT: [[AGG_RESULT21:%.*]] = bitcast %0* %agg.result to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT21]], i8* [[TMP219]], i32 32, i32 16, i1 false) +; CHECK-NEXT: ret void +; entry: %tmp2 = alloca %0 %memtmp = alloca %0, align 16 @@ -22,28 +33,24 @@ ; Check that one of the memcpy's are removed. ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. -; CHECK-LABEL: @test1( -; CHECK: call void @ccoshl -; CHECK: call void @llvm.memcpy -; CHECK-NOT: llvm.memcpy -; CHECK: ret void } -declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind +declare void @ccoshl(%0* nocapture sret, x86_fp80, x86_fp80) nounwind ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memmove. define void @test2(i8* %P, i8* %Q) nounwind { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %Q, i8* %P, i32 32, i32 16, i1 false) +; CHECK-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false) ret void - -; CHECK-LABEL: @test2( -; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P -; CHECK-NEXT: ret void + } @@ -52,28 +59,31 @@ @x = external global %0 define void @test3(%0* noalias sret %agg.result) nounwind { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[AGG_RESULT1:%.*]] = bitcast %0* %agg.result to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT1]], i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) +; CHECK-NEXT: ret void +; %x.0 = alloca %0 %x.01 = bitcast %0* %x.0 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) %agg.result2 = bitcast %0* %agg.result to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false) ret void -; CHECK-LABEL: @test3( -; CHECK-NEXT: %agg.result1 = bitcast -; CHECK-NEXT: call void @llvm.memcpy -; CHECK-NEXT: ret void } ; PR8644 define void @test4(i8 *%P) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: call void @test4a(i8* byval align 1 %P) +; CHECK-NEXT: ret void +; %A = alloca %1 %a = bitcast %1* %A to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false) call void @test4a(i8* align 1 byval %a) ret void -; CHECK-LABEL: @test4( -; CHECK-NEXT: call void @test4a( } declare void @test4a(i8* align 1 byval) @@ -89,6 +99,16 @@ ; rdar://8713376 - This memcpy can't be eliminated. define i32 @test5(i32 %x) nounwind ssp { +; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Y:%.*]] = alloca %struct.S, align 16 +; CHECK-NEXT: [[TMP:%.*]] = bitcast %struct.S* [[Y]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP]], i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false) +; CHECK-NEXT: [[A:%.*]] = getelementptr %struct.S, %struct.S* [[Y]], i64 0, i32 1, i64 0 +; CHECK-NEXT: store i8 4, i8* [[A]] +; CHECK-NEXT: call void @test5a(%struct.S* byval align 16 [[Y]]) +; CHECK-NEXT: ret i32 0 +; entry: %y = alloca %struct.S, align 16 %tmp = bitcast %struct.S* %y to i8* @@ -97,17 +117,15 @@ store i8 4, i8* %a call void @test5a(%struct.S* align 16 byval %y) ret i32 0 - ; CHECK-LABEL: @test5( - ; CHECK: store i8 4 - ; CHECK: call void @test5a(%struct.S* byval align 16 %y) } ;; Noop memcpy should be zapped. define void @test6(i8 *%P) { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false) - ret void ; CHECK-LABEL: @test6( -; CHECK-NEXT: ret void +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false) + ret void } @@ -116,6 +134,11 @@ %struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } define i32 @test7(%struct.p* nocapture align 8 byval %q) nounwind ssp { +; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @g(%struct.p* byval align 8 %q) #0 +; CHECK-NEXT: ret i32 [[CALL]] +; entry: %agg.tmp = alloca %struct.p, align 4 %tmp = bitcast %struct.p* %agg.tmp to i8* @@ -123,8 +146,6 @@ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false) %call = call i32 @g(%struct.p* align 8 byval %agg.tmp) nounwind ret i32 %call -; CHECK-LABEL: @test7( -; CHECK: call i32 @g(%struct.p* byval align 8 %q) [[NUW:#[0-9]+]] } declare i32 @g(%struct.p* align 8 byval) @@ -136,8 +157,9 @@ @test8.str = internal constant [7 x i8] c"ABCDEF\00" define void @test8() { -; CHECK: test8 -; CHECK-NOT: memcpy +; CHECK-LABEL: @test8( +; CHECK-NEXT: ret void +; %A = tail call i8* @malloc(i32 10) %B = getelementptr inbounds i8, i8* %A, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false) @@ -145,7 +167,6 @@ %D = getelementptr inbounds i8, i8* %C, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i32 1, i1 false) ret void -; CHECK: ret void } declare noalias i8* @malloc(i32) @@ -154,11 +175,14 @@ %struct.big = type { [50 x i32] } define void @test9_addrspacecast() nounwind ssp uwtable { -entry: ; CHECK-LABEL: @test9_addrspacecast( -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca %struct.big, align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; +entry: %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -170,11 +194,14 @@ } define void @test9() nounwind ssp uwtable { +; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca %struct.big, align 4 +; CHECK-NEXT: call void @f1(%struct.big* sret [[B]]) +; CHECK-NEXT: call void @f2(%struct.big* [[B]]) +; CHECK-NEXT: ret void +; entry: -; CHECK: test9 -; CHECK: f1 -; CHECK-NOT: memcpy -; CHECK: f2 %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 call void @f1(%struct.big* sret %tmp) @@ -193,6 +220,15 @@ declare void @foo(i32* noalias nocapture) define void @test10(%opaque* noalias nocapture sret %x, i32 %y) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 %y, i32* [[A]] +; CHECK-NEXT: call void @foo(i32* noalias nocapture [[A]]) +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[D:%.*]] = bitcast %opaque* %x to i32* +; CHECK-NEXT: store i32 [[C]], i32* [[D]] +; CHECK-NEXT: ret void +; %a = alloca i32, align 4 store i32 %y, i32* %a call void @foo(i32* noalias nocapture %a) @@ -204,8 +240,3 @@ declare void @f1(%struct.big* nocapture sret) declare void @f2(%struct.big*) - -; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { argmemonly nounwind } -; CHECK: attributes #2 = { nounwind ssp } -; CHECK: attributes #3 = { nounwind ssp uwtable } Index: test/Transforms/MemCpyOpt/memmove.ll =================================================================== --- test/Transforms/MemCpyOpt/memmove.ll +++ test/Transforms/MemCpyOpt/memmove.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s ; These memmoves should get optimized to memcpys. @@ -7,9 +8,15 @@ declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind define i8* @test1(i8* nocapture %src) nounwind { -entry: ; CHECK-LABEL: @test1( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MALLOCCALL:%.*]] = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) +; CHECK-NEXT: [[CALL3:%.*]] = bitcast i8* [[MALLOCCALL]] to [13 x i8]* +; CHECK-NEXT: [[CALL3_SUB:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* [[CALL3]], i64 0, i64 0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL3_SUB]], i8* %src, i64 13, i32 1, i1 false) +; CHECK-NEXT: ret i8* [[CALL3_SUB]] +; +entry: %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) %call3 = bitcast i8* %malloccall to [13 x i8]* @@ -21,9 +28,13 @@ define void @test2(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test2( -; CHECK: call void @llvm.memcpy +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* %P, i64 16 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 16, i32 1, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false) ret void @@ -31,9 +42,13 @@ ; This cannot be optimize because the src/dst really do overlap. define void @test3(i8* %P) nounwind { -entry: ; CHECK-LABEL: @test3( -; CHECK: call void @llvm.memmove +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* %P, i64 16 +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 17, i32 1, i1 false) +; CHECK-NEXT: ret void +; +entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false) ret void Index: test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -1,126 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %src, i64 %src_size, i8* %dst, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, %src_size +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, %src_size +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 %src_size +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 %c, i64 [[TMP3]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i64 -; CHECK-DAG: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i64 -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 [[DSTSIZE]], %src_size -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 [[DSTSIZE]], %src_size -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i64(i8* %dst, i8* %src, i32 %dst_size, i64 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i64( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %dst_size to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], %src_size +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], %src_size +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i64 %src_size +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 %c, i64 [[TMP4]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i128_i32 -; CHECK-DAG: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i128 -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 [[SRCSIZE]] -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 %dst_size, [[SRCSIZE]] -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 %dst_size, [[SRCSIZE]] -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i128_i32(i8* %dst, i8* %src, i128 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i128_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %src_size to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 %dst_size, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 %dst_size, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i128 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[TMP5]], i8 %c, i128 [[TMP4]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i32_i128 -; CHECK-DAG: [[DSTSIZE:%[0-9]+]] = zext i32 %dst_size to i128 -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i128 %src_size -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i128 [[DSTSIZE]], %src_size -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i128 [[DSTSIZE]], %src_size -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i128 0, i128 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[DST]], i8 %c, i128 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i32_i128(i8* %dst, i8* %src, i32 %dst_size, i128 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i32_i128( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %dst_size to i128 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i128 [[TMP1]], %src_size +; CHECK-NEXT: [[TMP3:%.*]] = sub i128 [[TMP1]], %src_size +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i128 %src_size +; CHECK-NEXT: call void @llvm.memset.p0i8.i128(i8* [[TMP5]], i8 %c, i128 [[TMP4]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_i64_i32 -; CHECK-DAG: [[SRCSIZE:%[0-9]+]] = zext i32 %src_size to i64 -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 [[SRCSIZE]] -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, [[SRCSIZE]] -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, [[SRCSIZE]] -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_i64_i32(i8* %dst, i8* %src, i64 %dst_size, i32 %src_size, i8 %c) { +; CHECK-LABEL: @test_different_types_i64_i32( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %src_size to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 %dst_size, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 %dst_size, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i64 [[TMP1]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 %c, i64 [[TMP4]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_align_same -; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false) define void @test_align_same(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_same( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_align_min -; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 4, i1 false) define void @test_align_min(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_min( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 36 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 36 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 36 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memcpy -; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 {{.*}}, i32 8, i1 false) define void @test_align_memcpy(i8* %src, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_align_memcpy( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 80 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 80 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 80 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false) ret void } -; CHECK-LABEL: define void @test_non_i8_dst_type -; CHECK-NEXT: %dst = bitcast i64* %dst_pi64 to i8* -; CHECK-DAG: [[DST:%[0-9]+]] = getelementptr i8, i8* %dst, i64 %src_size -; CHECK-DAG: [[ULE:%[0-9]+]] = icmp ule i64 %dst_size, %src_size -; CHECK-DAG: [[SIZEDIFF:%[0-9]+]] = sub i64 %dst_size, %src_size -; CHECK-DAG: [[SIZE:%[0-9]+]] = select i1 [[ULE]], i64 0, i64 [[SIZEDIFF]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST]], i8 %c, i64 [[SIZE]], i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_non_i8_dst_type(i8* %src, i64 %src_size, i64* %dst_pi64, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test_non_i8_dst_type( +; CHECK-NEXT: [[DST:%.*]] = bitcast i64* [[DST:%.*]]_pi64 to i8* +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST]]_size, %src_size +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST]]_size, %src_size +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST]], i64 %src_size +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 %c, i64 [[TMP3]], i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; %dst = bitcast i64* %dst_pi64 to i8* call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_dst -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_dst(i8* %dst2, i8* %src, i64 %src_size, i8* %dst, i64 %dst_size) { +; CHECK-LABEL: @test_different_dst( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) ret void @@ -128,12 +157,13 @@ ; Make sure we also take into account dependencies on the destination. -; CHECK-LABEL: define i8 @test_intermediate_read -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false) -; CHECK-NEXT: %r = load i8, i8* %a -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) -; CHECK-NEXT: ret i8 %r define i8 @test_intermediate_read(i8* %a, i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_read( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false) +; CHECK-NEXT: [[R:%.*]] = load i8, i8* %a +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) +; CHECK-NEXT: ret i8 [[R]] +; call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false) %r = load i8, i8* %a call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) @@ -142,15 +172,16 @@ %struct = type { [8 x i8], [8 x i8] } -; CHECK-LABEL: define void @test_intermediate_write -; CHECK-NEXT: %a = alloca %struct -; CHECK-NEXT: %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 -; CHECK-NEXT: %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %a0, i8 0, i64 16, i32 1, i1 false) -; CHECK-NEXT: store i8 1, i8* %a1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a0, i8* %b, i64 8, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_intermediate_write(i8* %b) #0 { +; CHECK-LABEL: @test_intermediate_write( +; CHECK-NEXT: [[A:%.*]] = alloca %struct +; CHECK-NEXT: [[A0:%.*]] = getelementptr %struct, %struct* [[A]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[A1:%.*]] = getelementptr %struct, %struct* [[A]], i32 0, i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A0]], i8 0, i64 16, i32 1, i1 false) +; CHECK-NEXT: store i8 1, i8* [[A1]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A0]], i8* %b, i64 8, i32 1, i1 false) +; CHECK-NEXT: ret void +; %a = alloca %struct %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 Index: test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll +++ test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll @@ -1,73 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -; CHECK-LABEL: define void @test( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) -; CHECK-NEXT: ret void define void @test(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memcpy( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memcpy(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memcpy( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_smaller_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_smaller_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_smaller_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_align_memset( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_align_memset(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_align_memset( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_types_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_types_2(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_types_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_different_source_gep( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: %p = getelementptr i8, i8* %dst1, i64 64 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %p, i64 64, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_different_source_gep(i8* %dst1, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_different_source_gep( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: [[P:%.*]] = getelementptr i8, i8* %dst1, i64 64 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* [[P]], i64 64, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) ; FIXME: We could optimize this as well. %p = getelementptr i8, i8* %dst1, i64 64 @@ -75,21 +83,23 @@ ret void } -; CHECK-LABEL: define void @test_variable_size_1( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_1(i8* %dst1, i64 %dst1_size, i8* %dst2, i8 %c) { +; CHECK-LABEL: @test_variable_size_1( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void } -; CHECK-LABEL: define void @test_variable_size_2( -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) -; CHECK-NEXT: ret void define void @test_variable_size_2(i8* %dst1, i8* %dst2, i64 %dst2_size, i8 %c) { +; CHECK-LABEL: @test_variable_size_2( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) +; CHECK-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) ret void Index: test/Transforms/MemCpyOpt/nontemporal.ll =================================================================== --- test/Transforms/MemCpyOpt/nontemporal.ll +++ test/Transforms/MemCpyOpt/nontemporal.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -5,16 +6,25 @@ ; Verify that we don't combine nontemporal stores into memset calls. define void @nontemporal_stores_1(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_1 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR2]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR3]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR4]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR5:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR5]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR6]], align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR7:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR7]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 @@ -35,10 +45,13 @@ } define void @nontemporal_stores_2(<4 x float>* nocapture %dst) { -; CHECK-LABEL: @nontemporal_stores_2 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 -; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0 -; CHECK-NEXT: ret void +; CHECK-LABEL: @nontemporal_stores_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 +; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; CHECK-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 Index: test/Transforms/MemCpyOpt/pr29105.ll =================================================================== --- test/Transforms/MemCpyOpt/pr29105.ll +++ test/Transforms/MemCpyOpt/pr29105.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -instcombine -S %s | FileCheck %s %Foo = type { [2048 x i64] } ; Make sure that all mempcy calls are converted to memset calls, or removed. -; CHECK-LABEL: @baz( -; CHECK-NOT: call void @llvm.memcpy define void @baz() unnamed_addr #0 { +; CHECK-LABEL: @baz( +; CHECK-NEXT: entry-block: +; CHECK-NEXT: [[TMP2:%.*]] = alloca %Foo, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %Foo* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 16384, i8* [[TMP0]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 16384, i32 8, i1 false) +; CHECK-NEXT: call void @bar(%Foo* noalias nocapture nonnull dereferenceable(16384) [[TMP2]]) +; CHECK-NEXT: call void @llvm.lifetime.end(i64 16384, i8* [[TMP0]]) +; CHECK-NEXT: ret void +; entry-block: %x.sroa.0 = alloca [2048 x i64], align 8 %tmp0 = alloca [2048 x i64], align 8 Index: test/Transforms/MemCpyOpt/profitable-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/profitable-memset.ll +++ test/Transforms/MemCpyOpt/profitable-memset.ll @@ -1,12 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -; CHECK-LABEL: @foo( -; CHECK-NOT: store -; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 8, i32 2, i1 false) - define void @foo(i64* nocapture %P) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* %P to i16* +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 8, i32 2, i1 false) +; CHECK-NEXT: ret void +; entry: %0 = bitcast i64* %P to i16* %arrayidx = getelementptr inbounds i16, i16* %0, i64 1 Index: test/Transforms/MemCpyOpt/smaller.ll =================================================================== --- test/Transforms/MemCpyOpt/smaller.ll +++ test/Transforms/MemCpyOpt/smaller.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s ; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s ; rdar://8875553 @@ -5,8 +6,6 @@ ; Memcpyopt shouldn't optimize the second memcpy using the first ; because the first has a smaller size. -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false) - target datalayout = "e-p:32:32:32" %struct.s = type { [11 x i8], i32 } @@ -18,6 +17,16 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @foo() nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca %struct.s, align 4 +; CHECK-NEXT: store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false) +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds %struct.s, %struct.s* [[AGG_TMP]], i32 0, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false) +; CHECK-NEXT: call void @check(%struct.s* byval [[AGG_TMP]]) +; CHECK-NEXT: ret void +; entry: %agg.tmp = alloca %struct.s, align 4 store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 Index: test/Transforms/MemCpyOpt/sret.ll =================================================================== --- test/Transforms/MemCpyOpt/sret.ll +++ test/Transforms/MemCpyOpt/sret.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" Index: test/Transforms/Util/combine-alias-scope-metadata.ll =================================================================== --- test/Transforms/Util/combine-alias-scope-metadata.ll +++ test/Transforms/Util/combine-alias-scope-metadata.ll @@ -1,10 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false) +; CHECK-NEXT: ret void +; %tmp = alloca i8 %tmp2 = alloca i8 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5