diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-02-01-ReturnAttrs.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s + +define internal i32 @deref(i32* %x) nounwind { +; CHECK-LABEL: define {{[^@]+}}@deref +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %tmp2 = load i32, i32* %x, align 4 + ret i32 %tmp2 +} + +define i32 @f(i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %x_addr = alloca i32 + store i32 %x, i32* %x_addr, align 4 + %tmp1 = call i32 @deref( i32* %x_addr ) nounwind + ret i32 %tmp1 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-07-02-array-indexing.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; PR2498 + +; This test tries to convince CHECK about promoting the load from %A + 2, +; because there is a load of %A in the entry block +define internal i32 @callee(i1 %C, i32* %A) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 [[A_0]] +; CHECK: F: +; CHECK-NEXT: [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + ; Unconditonally load the element at %A + %A.0 = load i32, i32* %A + br i1 %C, label %T, label %F + +T: + ret i32 %A.0 + +F: + ; Load the element at offset two from %A. This should not be promoted! + %A.2 = getelementptr i32, i32* %A, i32 2 + %R = load i32, i32* %A.2 + ret i32 %R +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; + %X = call i32 @callee(i1 false, i32* null) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-07-CGUpdate.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -inline -argpromotion -maxar=0 -disable-output + +define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { +entry: + unreachable +} + +define void @encode(i32* %m, i32* %ts, i32* %new) nounwind { +entry: + %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind ; [#uses=0] + unreachable +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/2008-09-08-CGUpdateSelfEdge.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -disable-output + +define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { +entry: + br i1 false, label %bb, label %bb5 + +bb: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind ; [#uses=0] + unreachable + +bb5: ; preds = %entry + ret i32 0 +} + +define i32 @term_Sharing(i32* %Term) nounwind { +entry: + br i1 false, label %bb.i, label %bb14 + +bb.i: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind ; [#uses=0] + ret i32 1 + +bb14: ; preds = %entry + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/aggregate-promote.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +%T = type { i32, i32, i32, i32 } +@G = constant %T { i32 0, i32 0, i32 17, i32 25 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2 +; CHECK-NEXT: [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]] +; CHECK-NEXT: [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3 +; CHECK-NEXT: [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @test(%T* @G) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attributes.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]] +; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @no_promote(<4 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@no_promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @promote(<4 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/attrs.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +%struct.ss = type { i32, i64 } + +; Don't drop 'byval' on %X here. +define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +; Also make sure we don't drop the call zeroext attribute. +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + + call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0) + + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/basictest.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -basicaa -argpromotion -maxar=0 -mem2reg -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @test(i32* %X, i32* %Y) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define internal i32 @caller(i32* %B) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32 [[B_VAL1:%.*]]) +; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @test(i32* %A, i32* %B) + ret i32 %C +} + +define i32 @callercaller() { +; CHECK-LABEL: define {{[^@]+}}@callercaller() +; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32 2) +; CHECK-NEXT: ret i32 [[X]] +; + %B = alloca i32 + store i32 2, i32* %B + %X = call i32 @caller(i32* %B) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval-2.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +; Arg promotion eliminates the struct argument. +; FIXME: We should eliminate the i32* argument. + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f( %struct.ss* byval %S, i32* byval %X) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/byval.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define internal void @g(%struct.ss* byval align 32 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define i32 @main() nounwind { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) +; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4 +; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 +; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f(%struct.ss* byval %S) nounwind + call void @g(%struct.ss* byval %S) nounwind + ret i32 0 +} + + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/chained.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +@G1 = constant i32 0 +@G2 = constant i32* @G1 + +define internal i32 @test(i32** %x) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL_VAL]] +; +entry: + %y = load i32*, i32** %x + %z = load i32, i32* %y + ret i32 %z +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G2_VAL:%.*]] = load i32*, i32** @G2 +; CHECK-NEXT: [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]] +; CHECK-NEXT: [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %x = call i32 @test(i32** @G2) + ret i32 %x +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +; Don't promote around control flow. +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br i1 %C, label %T, label %F + +T: + ret i32 17 + +F: + %X = load i32, i32* %P + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 true, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %X = call i32 @callee(i1 true, i32* null) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/control-flow2.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: ret i32 [[P_VAL]] +; + br i1 %C, label %T, label %F + +T: ; preds = %0 + ret i32 17 + +F: ; preds = %0 + %X = load i32, i32* %P ; [#uses=1] + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 17, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; + %A = alloca i32 ; [#uses=2] + store i32 17, i32* %A + %X = call i32 @callee( i1 false, i32* %A ) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/crash.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S < %s -inline -argpromotion -maxar=0 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM +; RUN: opt -S < %s -passes=inline,argpromotion -maxar=0 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM + +%S = type { %S* } + +; Inlining should nuke the invoke (and any inlined calls) here even with +; argument promotion running along with it. +define void @zot() personality i32 (...)* @wibble { +; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: hoge.exit: +; ARGPROMOTION-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION: bb1: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: bb2: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION-NEXT: cleanup +; ARGPROMOTION-NEXT: unreachable +; +bb: + invoke void @hoge() + to label %bb1 unwind label %bb2 + +bb1: + unreachable + +bb2: + %tmp = landingpad { i8*, i32 } + cleanup + unreachable +} + +define internal void @hoge() { +bb: + %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) + %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney) + unreachable +} + +define internal fastcc i8* @spam(i1 (i8*)* %arg) { +bb: + unreachable +} + +define internal i1 @eggs(i8* %arg) { +; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs() +; ALL_NEWPM-NEXT: bb: +; ALL_NEWPM-NEXT: unreachable +; +bb: + %tmp = call zeroext i1 @barney(i8* %arg) + unreachable +} + +define internal i1 @barney(i8* %arg) { +bb: + ret i1 undef +} + +define i32 @test_inf_promote_caller(i32 %arg) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = alloca %S + %tmp1 = alloca %S + %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1) + + ret i32 0 +} + +define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee +; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] +; ARGPROMOTION-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0 + %tmp2 = load %S*, %S** %tmp + %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0 + %tmp4 = load %S*, %S** %tmp3 + %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2) + + ret i32 0 +} + +declare i32 @wibble(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/dbg.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +declare void @sink(i32) + +define internal void @test(i32** %X) !dbg !2 { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3 +; CHECK-NEXT: call void @sink(i32 [[X_VAL_VAL]]) +; CHECK-NEXT: ret void +; + %1 = load i32*, i32** %X, align 8 + %2 = load i32, i32* %1, align 8 + call void @sink(i32 %2) + ret void +} + +%struct.pair = type { i32, i32 } + +define internal void @test_byval(%struct.pair* byval %P) { +; CHECK-LABEL: define {{[^@]+}}@test_byval +; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) +; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller(i32** %Y, %struct.pair* %P) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]]) +; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4 +; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4 +; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg !4 +; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5 +; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5 +; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5 +; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5 +; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5 +; CHECK-NEXT: ret void +; + call void @test(i32** %Y), !dbg !1 + + call void @test_byval(%struct.pair* %P), !dbg !6 + ret void +} + + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!3} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !DILocation(line: 8, scope: !2) +!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null) +!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5) +!5 = !DIFile(filename: "test.c", directory: "") +!6 = !DILocation(line: 9, scope: !2) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/fp80.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%union.u = type { x86_fp80 } +%struct.s = type { double, i16, i8, [5 x i8] } + +@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 + +%struct.Foo = type { i32, i64 } +@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 + +define void @run() { +; CHECK-LABEL: define {{[^@]+}}@run() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a) +; CHECK-NEXT: ret void +; +entry: + tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + call i64 @AccessPaddingOfStruct(%struct.Foo* @a) + call i64 @CaptureAStruct(%struct.Foo* @a) + ret void +} + +define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely +; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 +; CHECK-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] +; CHECK-NEXT: ret i8 [[RESULT]] +; +entry: + %bitcast = bitcast %union.u* %arg to %struct.s* + %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 + %result = load i8, i8* %gep + ret i8 %result +} + +define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely +; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) +; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0 +; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0 +; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] +; CHECK-NEXT: ret x86_fp80 [[FP80]] +; + %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80, x86_fp80* %gep + ret x86_fp80 %fp80 +} + +define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* +; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[P]] +; CHECK-NEXT: ret i64 [[V]] +; + %p = bitcast %struct.Foo* %a to i64* + %v = load i64, i64* %p + ret i64 %v +} + +define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] +; CHECK-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]] +; CHECK-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %a_ptr = alloca %struct.Foo* + br label %loop + +loop: + %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ] + %0 = phi %struct.Foo* [ %a, %entry ], [ %0, %loop ] + store %struct.Foo* %phi, %struct.Foo** %a_ptr + %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0 + br label %loop +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/inalloca.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -globalopt -argpromotion -maxar=0 -sroa -S | FileCheck %s +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=0 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i32 } + +; Argpromote + sroa should change this to passing the two integers by value. +define internal i32 @f(%struct.ss* inalloca %s) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 + %a = load i32, i32* %f0, align 4 + %b = load i32, i32* %f1, align 4 + %r = add i32 %a, %b + ret i32 %r +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2) +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %S = alloca inalloca %struct.ss + %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i32 1, i32* %f0, align 4 + store i32 2, i32* %f1, align 4 + %r = call i32 @f(%struct.ss* inalloca %S) + ret i32 %r +} + +; Argpromote can't promote %a because of the icmp use. +define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; +entry: + %c = icmp eq %struct.ss* %a, %b + ret i1 %c +} + +define i32 @test() { +; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca inalloca %struct.ss + %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/invalidation.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; Check that when argument promotion changes a function in some parent node of +; the call graph, any analyses that happened to be cached for that function are +; actually invalidated. We are using `demanded-bits` here because when printed +; it will end up caching a value for every instruction, making it easy to +; detect the instruction-level changes that will fail here. With improper +; invalidation this will crash in the second printer as it tries to reuse +; now-invalid demanded bits. +; +; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -maxar=0 -S | FileCheck %s + +@G = constant i32 0 + +define internal i32 @a(i32* %x) { +; CHECK-LABEL: define {{[^@]+}}@a +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %v = load i32, i32* %x + ret i32 %v +} + +define i32 @b() { +; CHECK-LABEL: define {{[^@]+}}@b() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @a(i32* @G) + ret i32 %v +} + +define i32 @c() { +; CHECK-LABEL: define {{[^@]+}}@c() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V1:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @b() +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %v1 = call i32 @a(i32* @G) + %v2 = call i32 @b() + %result = add i32 %v1, %v2 + ret i32 %result +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/min-legal-vector-width.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" } +attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #5 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/musttail.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; PR36543 + +; Don't promote arguments of musttail callee + +%T = type { i32, i32, i32, i32 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[V]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[V:%.*]] = musttail call i32 @test(%T* [[P]]) +; CHECK-NEXT: ret i32 [[V]] +; + %v = musttail call i32 @test(%T* %p) + ret i32 %v +} + +; Don't promote arguments of musttail caller + +define i32 @foo(%T* %p, i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]]) +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define internal i32 @test2(%T* %p, i32 %p2) { +; CHECK-LABEL: define {{[^@]+}}@test2 +; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]]) +; CHECK-NEXT: ret i32 [[CA]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + %ca = musttail call i32 @foo(%T* undef, i32 %v) + ret i32 %ca +} + +define i32 @caller2(%T* %g) { +; CHECK-LABEL: define {{[^@]+}}@caller2 +; CHECK-SAME: (%T* [[G:%.*]]) +; CHECK-NEXT: [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0) +; CHECK-NEXT: ret i32 [[V]] +; + %v = call i32 @test2(%T* %g, i32 0) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/naked_functions.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s + +; Don't promote paramaters of/arguments to naked functions + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* @g) +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) #0 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (i32* [[TMP0:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + + +attributes #0 = { naked } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/nonzero-address-spaces.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s + +; ArgumentPromotion should preserve the default function address space +; from the data layout. + +target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() +; CHECK-NEXT: ret i32 [[CALL]] +; + +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) { +; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr27568.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=0 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=0 < %s | FileCheck %s +; RUN: opt -S -maxar=0 -debugify -o /dev/null < %s +target triple = "x86_64-pc-windows-msvc" + +define internal void @callee(i8*) { +; CHECK-LABEL: define {{[^@]+}}@callee() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @thunk() +; CHECK-NEXT: ret void +; +entry: + call void @thunk() + ret void +} + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @thunk() +; CHECK-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] +; CHECK: out: +; CHECK-NEXT: ret void +; CHECK: cpad: +; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none [] +; CHECK-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] +; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller +; +entry: + invoke void @thunk() + to label %out unwind label %cpad + +out: + ret void + +cpad: + %pad = cleanuppad within none [] + call void @callee(i8* null) [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +} + + +declare void @thunk() + +declare i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr32917.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; PR 32917 + +@b = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32 0, align 4 + +define i32 @fn2() local_unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1 +; CHECK-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 +; CHECK-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) +; CHECK-NEXT: ret i32 undef +; + %1 = load i32, i32* @b, align 4 + %2 = sext i32 %1 to i64 + %3 = inttoptr i64 %2 to i32* + call fastcc void @fn1(i32* %3) + ret i32 undef +} + +define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn1 +; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4 +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds i32, i32* %0, i64 -1 + %3 = load i32, i32* %2, align 4 + store i32 %3, i32* @a, align 4 + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/pr33641_remove_arg_dbgvalue.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=0 -verify -dse -S %s -o - | FileCheck %s + +; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to +; dbg.value which still used the removed argument. + +; The %p argument should be removed, and the use of it in dbg.value should be +; changed to undef. + +%p_t = type i16* +%fun_t = type void (%p_t)* + +define void @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: ret void +; + %tmp = alloca %fun_t + store %fun_t @bar, %fun_t* %tmp + ret void +} + +define internal void @bar(%p_t %p) { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5 +; CHECK-NEXT: ret void +; + call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "bar", unit: !0) +!4 = !DILocalVariable(name: "p", scope: !3) +!5 = !DIExpression() +!6 = !DILocation(line: 1, column: 1, scope: !3) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/profile.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=0 -mem2reg -S < %s | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; Checks if !prof metadata is corret in deadargelim. + +define void @caller() #0 { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 +; CHECK-NEXT: ret void +; + %x = alloca i32 + store i32 42, i32* %x + call void @promote_i32_ptr(i32* %x), !prof !0 + ret void +} + +define internal void @promote_i32_ptr(i32* %xp) { +; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr +; CHECK-SAME: (i32 [[XP_VAL:%.*]]) +; CHECK-NEXT: call void @use_i32(i32 [[XP_VAL]]) +; CHECK-NEXT: ret void +; + %x = load i32, i32* %xp + call void @use_i32(i32 %x) + ret void +} + +declare void @use_i32(i32) + +!0 = !{!"branch_weights", i32 30} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/reserve-tbaa.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +; PR17906 +; When we promote two arguments in a single function with different types, +; before the fix, we used the same tag for the newly-created two loads. +; This testing case makes sure that we correctly transfer the tbaa tags from the +; original loads to the newly-created loads when promoting pointer arguments. + +@a = global i32* null, align 8 +@e = global i32** @a, align 8 +@g = global i32 0, align 4 +@c = global i64 0, align 8 +@d = global i8 0, align 1 + +define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) { +; CHECK-LABEL: define {{[^@]+}}@fn +; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8 +; CHECK-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !0 +; CHECK-NEXT: ret void +; +entry: + %0 = load i64, i64* %p2, align 8, !tbaa !1 + %conv = trunc i64 %0 to i32 + %1 = load i32, i32* %p1, align 4, !tbaa !5 + %conv1 = trunc i32 %1 to i8 + store i8 %conv1, i8* @d, align 1, !tbaa !7 + ret void +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3 +; CHECK-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !5 +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5 +; CHECK-NEXT: [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7 +; CHECK-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = load i32**, i32*** @e, align 8, !tbaa !8 + store i32* @g, i32** %0, align 8, !tbaa !8 + %1 = load i32*, i32** @a, align 8, !tbaa !8 + store i32 1, i32* %1, align 4, !tbaa !5 + call fastcc void @fn(i32* @g, i64* @c) + + ret i32 0 +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"long", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !3, i64 0} +!7 = !{!3, !3, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"any pointer", !3, i64 0} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/sret.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define internal void @add({i32, i32}* %this, i32* sret %r) { +; CHECK-LABEL: define {{[^@]+}}@add +; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]]) +; CHECK-NEXT: [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]] +; CHECK-NEXT: store i32 [[AB]], i32* [[R]] +; CHECK-NEXT: ret void +; + %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 + %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 + %a = load i32, i32* %ap + %b = load i32, i32* %bp + %ab = add i32 %a, %b + store i32 %ab, i32* %r + ret void +} + +define void @f() { +; CHECK-LABEL: define {{[^@]+}}@f() +; CHECK-NEXT: [[R:%.*]] = alloca i32 +; CHECK-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; CHECK-NEXT: [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0 +; CHECK-NEXT: [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]] +; CHECK-NEXT: [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1 +; CHECK-NEXT: [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]] +; CHECK-NEXT: call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]]) +; CHECK-NEXT: ret void +; + %r = alloca i32 + %pair = alloca {i32, i32} + + call void @add({i32, i32}* %pair, i32* sret %r) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/tail.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -argpromotion -maxar=0 -S -o - | FileCheck %s +; RUN: opt %s -passes=argpromotion -maxar=0 -S -o - | FileCheck %s +; PR14710 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%pair = type { i32, i32 } + +declare i8* @foo(%pair*) + +define internal void @bar(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]]) +; CHECK-NEXT: [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[DATA_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[DATA_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]]) +; CHECK-NEXT: ret void +; + tail call i8* @foo(%pair* %Data) + ret void +} + +define void @zed(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@zed +; CHECK-SAME: (%pair* byval [[DATA:%.*]]) +; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4 +; CHECK-NEXT: [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4 +; CHECK-NEXT: call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]]) +; CHECK-NEXT: ret void +; + call void @bar(%pair* byval %Data) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/thiscall.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; In PR41658, argpromotion put an inalloca in a position that per the +; calling convention is passed in a register. This test verifies that +; we don't do that anymore. It also verifies that the combination of +; globalopt and argpromotion is able to optimize the call safely. +; +; RUN: opt -S -argpromotion -maxar=0 %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -S -globalopt -argpromotion -maxar=0 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" + +%struct.a = type { i8 } + +define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]]) +; ARGPROMOTION-NEXT: entry: +; ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: entry: +; GLOBALOPT_ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; +entry: + %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 + %argmem = alloca inalloca <{ %struct.a }>, align 4 + %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0 + %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a) + call void @ext(<{ %struct.a }>* inalloca %argmem) + ret void +} + +; This is here to ensure @internalfun is live. +define void @exportedfun(%struct.a* %a) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) +; ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; ARGPROMOTION-NEXT: call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; + %inalloca.save = tail call i8* @llvm.stacksave() + %argmem = alloca inalloca <{ %struct.a }>, align 4 + call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem) + call void @llvm.stackrestore(i8* %inalloca.save) + ret void +} + +declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1)) +declare void @ext(<{ %struct.a }>* inalloca) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-0/variadic.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=0 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=0 -S | FileCheck %s + +; Unused arguments from variadic functions cannot be eliminated as that changes +; their classiciation according to the SysV amd64 ABI. Clang and other frontends +; bake in the classification when they use things like byval, as in this test. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.tt0 = type { i64, i64 } +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8 + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) + ret i32 0 +} + +; Function Attrs: nounwind uwtable +define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) { +; CHECK-LABEL: define {{[^@]+}}@callee_t0f +; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-02-01-ReturnAttrs.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s + +define internal i32 @deref(i32* %x) nounwind { +; CHECK-LABEL: define {{[^@]+}}@deref +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %tmp2 = load i32, i32* %x, align 4 + ret i32 %tmp2 +} + +define i32 @f(i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %x_addr = alloca i32 + store i32 %x, i32* %x_addr, align 4 + %tmp1 = call i32 @deref( i32* %x_addr ) nounwind + ret i32 %tmp1 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-07-02-array-indexing.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; PR2498 + +; This test tries to convince CHECK about promoting the load from %A + 2, +; because there is a load of %A in the entry block +define internal i32 @callee(i1 %C, i32* %A) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 [[A_0]] +; CHECK: F: +; CHECK-NEXT: [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + ; Unconditonally load the element at %A + %A.0 = load i32, i32* %A + br i1 %C, label %T, label %F + +T: + ret i32 %A.0 + +F: + ; Load the element at offset two from %A. This should not be promoted! + %A.2 = getelementptr i32, i32* %A, i32 2 + %R = load i32, i32* %A.2 + ret i32 %R +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; + %X = call i32 @callee(i1 false, i32* null) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-07-CGUpdate.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -inline -argpromotion -maxar=3 -disable-output + +define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { +entry: + unreachable +} + +define void @encode(i32* %m, i32* %ts, i32* %new) nounwind { +entry: + %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind ; [#uses=0] + unreachable +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/2008-09-08-CGUpdateSelfEdge.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -disable-output + +define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { +entry: + br i1 false, label %bb, label %bb5 + +bb: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind ; [#uses=0] + unreachable + +bb5: ; preds = %entry + ret i32 0 +} + +define i32 @term_Sharing(i32* %Term) nounwind { +entry: + br i1 false, label %bb.i, label %bb14 + +bb.i: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind ; [#uses=0] + ret i32 1 + +bb14: ; preds = %entry + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/aggregate-promote.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +%T = type { i32, i32, i32, i32 } +@G = constant %T { i32 0, i32 0, i32 17, i32 25 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2 +; CHECK-NEXT: [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]] +; CHECK-NEXT: [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3 +; CHECK-NEXT: [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @test(%T* @G) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attributes.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]] +; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @no_promote(<4 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@no_promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @promote(<4 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/attrs.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +%struct.ss = type { i32, i64 } + +; Don't drop 'byval' on %X here. +define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +; Also make sure we don't drop the call zeroext attribute. +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + + call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0) + + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/basictest.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -basicaa -argpromotion -maxar=3 -mem2reg -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @test(i32* %X, i32* %Y) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define internal i32 @caller(i32* %B) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32 [[B_VAL1:%.*]]) +; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @test(i32* %A, i32* %B) + ret i32 %C +} + +define i32 @callercaller() { +; CHECK-LABEL: define {{[^@]+}}@callercaller() +; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32 2) +; CHECK-NEXT: ret i32 [[X]] +; + %B = alloca i32 + store i32 2, i32* %B + %X = call i32 @caller(i32* %B) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval-2.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +; Arg promotion eliminates the struct argument. +; FIXME: We should eliminate the i32* argument. + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f( %struct.ss* byval %S, i32* byval %X) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/byval.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define internal void @g(%struct.ss* byval align 32 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define i32 @main() nounwind { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) +; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4 +; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 +; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f(%struct.ss* byval %S) nounwind + call void @g(%struct.ss* byval %S) nounwind + ret i32 0 +} + + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/chained.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +@G1 = constant i32 0 +@G2 = constant i32* @G1 + +define internal i32 @test(i32** %x) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL_VAL]] +; +entry: + %y = load i32*, i32** %x + %z = load i32, i32* %y + ret i32 %z +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G2_VAL:%.*]] = load i32*, i32** @G2 +; CHECK-NEXT: [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]] +; CHECK-NEXT: [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %x = call i32 @test(i32** @G2) + ret i32 %x +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +; Don't promote around control flow. +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br i1 %C, label %T, label %F + +T: + ret i32 17 + +F: + %X = load i32, i32* %P + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 true, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %X = call i32 @callee(i1 true, i32* null) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/control-flow2.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: ret i32 [[P_VAL]] +; + br i1 %C, label %T, label %F + +T: ; preds = %0 + ret i32 17 + +F: ; preds = %0 + %X = load i32, i32* %P ; [#uses=1] + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 17, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; + %A = alloca i32 ; [#uses=2] + store i32 17, i32* %A + %X = call i32 @callee( i1 false, i32* %A ) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/crash.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S < %s -inline -argpromotion -maxar=3 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM +; RUN: opt -S < %s -passes=inline,argpromotion -maxar=3 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM + +%S = type { %S* } + +; Inlining should nuke the invoke (and any inlined calls) here even with +; argument promotion running along with it. +define void @zot() personality i32 (...)* @wibble { +; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: hoge.exit: +; ARGPROMOTION-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION: bb1: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: bb2: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION-NEXT: cleanup +; ARGPROMOTION-NEXT: unreachable +; +bb: + invoke void @hoge() + to label %bb1 unwind label %bb2 + +bb1: + unreachable + +bb2: + %tmp = landingpad { i8*, i32 } + cleanup + unreachable +} + +define internal void @hoge() { +bb: + %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) + %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney) + unreachable +} + +define internal fastcc i8* @spam(i1 (i8*)* %arg) { +bb: + unreachable +} + +define internal i1 @eggs(i8* %arg) { +; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs() +; ALL_NEWPM-NEXT: bb: +; ALL_NEWPM-NEXT: unreachable +; +bb: + %tmp = call zeroext i1 @barney(i8* %arg) + unreachable +} + +define internal i1 @barney(i8* %arg) { +bb: + ret i1 undef +} + +define i32 @test_inf_promote_caller(i32 %arg) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = alloca %S + %tmp1 = alloca %S + %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1) + + ret i32 0 +} + +define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee +; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] +; ARGPROMOTION-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0 + %tmp2 = load %S*, %S** %tmp + %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0 + %tmp4 = load %S*, %S** %tmp3 + %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2) + + ret i32 0 +} + +declare i32 @wibble(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/dbg.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +declare void @sink(i32) + +define internal void @test(i32** %X) !dbg !2 { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3 +; CHECK-NEXT: call void @sink(i32 [[X_VAL_VAL]]) +; CHECK-NEXT: ret void +; + %1 = load i32*, i32** %X, align 8 + %2 = load i32, i32* %1, align 8 + call void @sink(i32 %2) + ret void +} + +%struct.pair = type { i32, i32 } + +define internal void @test_byval(%struct.pair* byval %P) { +; CHECK-LABEL: define {{[^@]+}}@test_byval +; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) +; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller(i32** %Y, %struct.pair* %P) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]]) +; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4 +; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4 +; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg !4 +; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5 +; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5 +; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5 +; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5 +; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5 +; CHECK-NEXT: ret void +; + call void @test(i32** %Y), !dbg !1 + + call void @test_byval(%struct.pair* %P), !dbg !6 + ret void +} + + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!3} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !DILocation(line: 8, scope: !2) +!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null) +!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5) +!5 = !DIFile(filename: "test.c", directory: "") +!6 = !DILocation(line: 9, scope: !2) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/fp80.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%union.u = type { x86_fp80 } +%struct.s = type { double, i16, i8, [5 x i8] } + +@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 + +%struct.Foo = type { i32, i64 } +@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 + +define void @run() { +; CHECK-LABEL: define {{[^@]+}}@run() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a) +; CHECK-NEXT: ret void +; +entry: + tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + call i64 @AccessPaddingOfStruct(%struct.Foo* @a) + call i64 @CaptureAStruct(%struct.Foo* @a) + ret void +} + +define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely +; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 +; CHECK-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] +; CHECK-NEXT: ret i8 [[RESULT]] +; +entry: + %bitcast = bitcast %union.u* %arg to %struct.s* + %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 + %result = load i8, i8* %gep + ret i8 %result +} + +define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely +; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) +; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0 +; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0 +; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] +; CHECK-NEXT: ret x86_fp80 [[FP80]] +; + %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80, x86_fp80* %gep + ret x86_fp80 %fp80 +} + +define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* +; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[P]] +; CHECK-NEXT: ret i64 [[V]] +; + %p = bitcast %struct.Foo* %a to i64* + %v = load i64, i64* %p + ret i64 %v +} + +define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] +; CHECK-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]] +; CHECK-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %a_ptr = alloca %struct.Foo* + br label %loop + +loop: + %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ] + %0 = phi %struct.Foo* [ %a, %entry ], [ %0, %loop ] + store %struct.Foo* %phi, %struct.Foo** %a_ptr + %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0 + br label %loop +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/inalloca.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -globalopt -argpromotion -maxar=3 -sroa -S | FileCheck %s +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=3 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i32 } + +; Argpromote + sroa should change this to passing the two integers by value. +define internal i32 @f(%struct.ss* inalloca %s) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 + %a = load i32, i32* %f0, align 4 + %b = load i32, i32* %f1, align 4 + %r = add i32 %a, %b + ret i32 %r +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2) +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %S = alloca inalloca %struct.ss + %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i32 1, i32* %f0, align 4 + store i32 2, i32* %f1, align 4 + %r = call i32 @f(%struct.ss* inalloca %S) + ret i32 %r +} + +; Argpromote can't promote %a because of the icmp use. +define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; +entry: + %c = icmp eq %struct.ss* %a, %b + ret i1 %c +} + +define i32 @test() { +; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca inalloca %struct.ss + %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/invalidation.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; Check that when argument promotion changes a function in some parent node of +; the call graph, any analyses that happened to be cached for that function are +; actually invalidated. We are using `demanded-bits` here because when printed +; it will end up caching a value for every instruction, making it easy to +; detect the instruction-level changes that will fail here. With improper +; invalidation this will crash in the second printer as it tries to reuse +; now-invalid demanded bits. +; +; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -maxar=3 -S | FileCheck %s + +@G = constant i32 0 + +define internal i32 @a(i32* %x) { +; CHECK-LABEL: define {{[^@]+}}@a +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %v = load i32, i32* %x + ret i32 %v +} + +define i32 @b() { +; CHECK-LABEL: define {{[^@]+}}@b() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @a(i32* @G) + ret i32 %v +} + +define i32 @c() { +; CHECK-LABEL: define {{[^@]+}}@c() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V1:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @b() +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %v1 = call i32 @a(i32* @G) + %v2 = call i32 @b() + %result = add i32 %v1, %v2 + ret i32 %result +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/min-legal-vector-width.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" } +attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #5 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/musttail.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; PR36543 + +; Don't promote arguments of musttail callee + +%T = type { i32, i32, i32, i32 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[V]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[V:%.*]] = musttail call i32 @test(%T* [[P]]) +; CHECK-NEXT: ret i32 [[V]] +; + %v = musttail call i32 @test(%T* %p) + ret i32 %v +} + +; Don't promote arguments of musttail caller + +define i32 @foo(%T* %p, i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]]) +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define internal i32 @test2(%T* %p, i32 %p2) { +; CHECK-LABEL: define {{[^@]+}}@test2 +; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]]) +; CHECK-NEXT: ret i32 [[CA]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + %ca = musttail call i32 @foo(%T* undef, i32 %v) + ret i32 %ca +} + +define i32 @caller2(%T* %g) { +; CHECK-LABEL: define {{[^@]+}}@caller2 +; CHECK-SAME: (%T* [[G:%.*]]) +; CHECK-NEXT: [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0) +; CHECK-NEXT: ret i32 [[V]] +; + %v = call i32 @test2(%T* %g, i32 0) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/naked_functions.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s + +; Don't promote paramaters of/arguments to naked functions + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* @g) +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) #0 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (i32* [[TMP0:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + + +attributes #0 = { naked } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/nonzero-address-spaces.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s + +; ArgumentPromotion should preserve the default function address space +; from the data layout. + +target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() +; CHECK-NEXT: ret i32 [[CALL]] +; + +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) { +; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr27568.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=3 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=3 < %s | FileCheck %s +; RUN: opt -S -debugify -maxar=3 -o /dev/null < %s +target triple = "x86_64-pc-windows-msvc" + +define internal void @callee(i8*) { +; CHECK-LABEL: define {{[^@]+}}@callee() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @thunk() +; CHECK-NEXT: ret void +; +entry: + call void @thunk() + ret void +} + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @thunk() +; CHECK-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] +; CHECK: out: +; CHECK-NEXT: ret void +; CHECK: cpad: +; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none [] +; CHECK-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] +; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller +; +entry: + invoke void @thunk() + to label %out unwind label %cpad + +out: + ret void + +cpad: + %pad = cleanuppad within none [] + call void @callee(i8* null) [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +} + + +declare void @thunk() + +declare i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr32917.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; PR 32917 + +@b = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32 0, align 4 + +define i32 @fn2() local_unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1 +; CHECK-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 +; CHECK-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) +; CHECK-NEXT: ret i32 undef +; + %1 = load i32, i32* @b, align 4 + %2 = sext i32 %1 to i64 + %3 = inttoptr i64 %2 to i32* + call fastcc void @fn1(i32* %3) + ret i32 undef +} + +define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn1 +; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4 +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds i32, i32* %0, i64 -1 + %3 = load i32, i32* %2, align 4 + store i32 %3, i32* @a, align 4 + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/pr33641_remove_arg_dbgvalue.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=3 -verify -dse -S %s -o - | FileCheck %s + +; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to +; dbg.value which still used the removed argument. + +; The %p argument should be removed, and the use of it in dbg.value should be +; changed to undef. + +%p_t = type i16* +%fun_t = type void (%p_t)* + +define void @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: ret void +; + %tmp = alloca %fun_t + store %fun_t @bar, %fun_t* %tmp + ret void +} + +define internal void @bar(%p_t %p) { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5 +; CHECK-NEXT: ret void +; + call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "bar", unit: !0) +!4 = !DILocalVariable(name: "p", scope: !3) +!5 = !DIExpression() +!6 = !DILocation(line: 1, column: 1, scope: !3) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/profile.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=3 -mem2reg -S < %s | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; Checks if !prof metadata is corret in deadargelim. + +define void @caller() #0 { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 +; CHECK-NEXT: ret void +; + %x = alloca i32 + store i32 42, i32* %x + call void @promote_i32_ptr(i32* %x), !prof !0 + ret void +} + +define internal void @promote_i32_ptr(i32* %xp) { +; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr +; CHECK-SAME: (i32 [[XP_VAL:%.*]]) +; CHECK-NEXT: call void @use_i32(i32 [[XP_VAL]]) +; CHECK-NEXT: ret void +; + %x = load i32, i32* %xp + call void @use_i32(i32 %x) + ret void +} + +declare void @use_i32(i32) + +!0 = !{!"branch_weights", i32 30} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/reserve-tbaa.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +; PR17906 +; When we promote two arguments in a single function with different types, +; before the fix, we used the same tag for the newly-created two loads. +; This testing case makes sure that we correctly transfer the tbaa tags from the +; original loads to the newly-created loads when promoting pointer arguments. + +@a = global i32* null, align 8 +@e = global i32** @a, align 8 +@g = global i32 0, align 4 +@c = global i64 0, align 8 +@d = global i8 0, align 1 + +define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) { +; CHECK-LABEL: define {{[^@]+}}@fn +; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8 +; CHECK-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !0 +; CHECK-NEXT: ret void +; +entry: + %0 = load i64, i64* %p2, align 8, !tbaa !1 + %conv = trunc i64 %0 to i32 + %1 = load i32, i32* %p1, align 4, !tbaa !5 + %conv1 = trunc i32 %1 to i8 + store i8 %conv1, i8* @d, align 1, !tbaa !7 + ret void +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3 +; CHECK-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !5 +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5 +; CHECK-NEXT: [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7 +; CHECK-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = load i32**, i32*** @e, align 8, !tbaa !8 + store i32* @g, i32** %0, align 8, !tbaa !8 + %1 = load i32*, i32** @a, align 8, !tbaa !8 + store i32 1, i32* %1, align 4, !tbaa !5 + call fastcc void @fn(i32* @g, i64* @c) + + ret i32 0 +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"long", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !3, i64 0} +!7 = !{!3, !3, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"any pointer", !3, i64 0} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/sret.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define internal void @add({i32, i32}* %this, i32* sret %r) { +; CHECK-LABEL: define {{[^@]+}}@add +; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]]) +; CHECK-NEXT: [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]] +; CHECK-NEXT: store i32 [[AB]], i32* [[R]] +; CHECK-NEXT: ret void +; + %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 + %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 + %a = load i32, i32* %ap + %b = load i32, i32* %bp + %ab = add i32 %a, %b + store i32 %ab, i32* %r + ret void +} + +define void @f() { +; CHECK-LABEL: define {{[^@]+}}@f() +; CHECK-NEXT: [[R:%.*]] = alloca i32 +; CHECK-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; CHECK-NEXT: [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0 +; CHECK-NEXT: [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]] +; CHECK-NEXT: [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1 +; CHECK-NEXT: [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]] +; CHECK-NEXT: call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]]) +; CHECK-NEXT: ret void +; + %r = alloca i32 + %pair = alloca {i32, i32} + + call void @add({i32, i32}* %pair, i32* sret %r) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/tail.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -argpromotion -maxar=3 -S -o - | FileCheck %s +; RUN: opt %s -passes=argpromotion -maxar=3 -S -o - | FileCheck %s +; PR14710 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%pair = type { i32, i32 } + +declare i8* @foo(%pair*) + +define internal void @bar(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]]) +; CHECK-NEXT: [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[DATA_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[DATA_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]]) +; CHECK-NEXT: ret void +; + tail call i8* @foo(%pair* %Data) + ret void +} + +define void @zed(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@zed +; CHECK-SAME: (%pair* byval [[DATA:%.*]]) +; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4 +; CHECK-NEXT: [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4 +; CHECK-NEXT: call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]]) +; CHECK-NEXT: ret void +; + call void @bar(%pair* byval %Data) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/thiscall.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; In PR41658, argpromotion put an inalloca in a position that per the +; calling convention is passed in a register. This test verifies that +; we don't do that anymore. It also verifies that the combination of +; globalopt and argpromotion is able to optimize the call safely. +; +; RUN: opt -S -argpromotion -maxar=3 %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -S -globalopt -argpromotion -maxar=3 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" + +%struct.a = type { i8 } + +define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]]) +; ARGPROMOTION-NEXT: entry: +; ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: entry: +; GLOBALOPT_ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; +entry: + %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 + %argmem = alloca inalloca <{ %struct.a }>, align 4 + %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0 + %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a) + call void @ext(<{ %struct.a }>* inalloca %argmem) + ret void +} + +; This is here to ensure @internalfun is live. +define void @exportedfun(%struct.a* %a) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) +; ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; ARGPROMOTION-NEXT: call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; + %inalloca.save = tail call i8* @llvm.stacksave() + %argmem = alloca inalloca <{ %struct.a }>, align 4 + call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem) + call void @llvm.stackrestore(i8* %inalloca.save) + ret void +} + +declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1)) +declare void @ext(<{ %struct.a }>* inalloca) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-3/variadic.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=3 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=3 -S | FileCheck %s + +; Unused arguments from variadic functions cannot be eliminated as that changes +; their classiciation according to the SysV amd64 ABI. Clang and other frontends +; bake in the classification when they use things like byval, as in this test. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.tt0 = type { i64, i64 } +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8 + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) + ret i32 0 +} + +; Function Attrs: nounwind uwtable +define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) { +; CHECK-LABEL: define {{[^@]+}}@callee_t0f +; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-02-01-ReturnAttrs.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s + +define internal i32 @deref(i32* %x) nounwind { +; CHECK-LABEL: define {{[^@]+}}@deref +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %tmp2 = load i32, i32* %x, align 4 + ret i32 %tmp2 +} + +define i32 @f(i32 %x) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32 +; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %x_addr = alloca i32 + store i32 %x, i32* %x_addr, align 4 + %tmp1 = call i32 @deref( i32* %x_addr ) nounwind + ret i32 %tmp1 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-07-02-array-indexing.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; PR2498 + +; This test tries to convince CHECK about promoting the load from %A + 2, +; because there is a load of %A in the entry block +define internal i32 @callee(i1 %C, i32* %A) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 [[A_0]] +; CHECK: F: +; CHECK-NEXT: [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2 +; CHECK-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + ; Unconditonally load the element at %A + %A.0 = load i32, i32* %A + br i1 %C, label %T, label %F + +T: + ret i32 %A.0 + +F: + ; Load the element at offset two from %A. This should not be promoted! + %A.2 = getelementptr i32, i32* %A, i32 2 + %R = load i32, i32* %A.2 + ret i32 %R +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; + %X = call i32 @callee(i1 false, i32* null) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-07-CGUpdate.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -inline -argpromotion -maxar=2147483647 -disable-output + +define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { +entry: + unreachable +} + +define void @encode(i32* %m, i32* %ts, i32* %new) nounwind { +entry: + %0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind ; [#uses=0] + unreachable +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/2008-09-08-CGUpdateSelfEdge.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -disable-output + +define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { +entry: + br i1 false, label %bb, label %bb5 + +bb: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind ; [#uses=0] + unreachable + +bb5: ; preds = %entry + ret i32 0 +} + +define i32 @term_Sharing(i32* %Term) nounwind { +entry: + br i1 false, label %bb.i, label %bb14 + +bb.i: ; preds = %entry + %0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind ; [#uses=0] + ret i32 1 + +bb14: ; preds = %entry + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/aggregate-promote.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +%T = type { i32, i32, i32, i32 } +@G = constant %T { i32 0, i32 0, i32 17, i32 25 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[P_0_2_VAL:%.*]], i32 [[P_0_3_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_IDX:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 2 +; CHECK-NEXT: [[G_IDX_VAL:%.*]] = load i32, i32* [[G_IDX]] +; CHECK-NEXT: [[G_IDX1:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 3 +; CHECK-NEXT: [[G_IDX1_VAL:%.*]] = load i32, i32* [[G_IDX1]] +; CHECK-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @test(%T* @G) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attributes.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]] +; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @no_promote(<4 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@no_promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* [[TMP2]], <4 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote_avx2 +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <4 x i64>, <4 x i64>* %arg1 + store <4 x i64> %tmp, <4 x i64>* %arg + ret void +} + +define void @promote(<4 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@promote +; CHECK-SAME: (<4 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @promote_avx2(<4 x i64>* [[TMP2]], <4 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <4 x i64>, align 32 + %tmp2 = alloca <4 x i64>, align 32 + %tmp3 = bitcast <4 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) + %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 + store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/attrs.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +%struct.ss = type { i32, i64 } + +; Don't drop 'byval' on %X here. +define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +; Also make sure we don't drop the call zeroext attribute. +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + + call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0) + + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/basictest.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -basicaa -argpromotion -maxar=2147483647 -mem2reg -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @test(i32* %X, i32* %Y) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define internal i32 @caller(i32* %B) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32 [[B_VAL1:%.*]]) +; CHECK-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @test(i32* %A, i32* %B) + ret i32 %C +} + +define i32 @callercaller() { +; CHECK-LABEL: define {{[^@]+}}@callercaller() +; CHECK-NEXT: [[X:%.*]] = call i32 @caller(i32 2) +; CHECK-NEXT: ret i32 [[X]] +; + %B = alloca i32 + store i32 2, i32* %B + %X = call i32 @caller(i32* %B) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval-2.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +; Arg promotion eliminates the struct argument. +; FIXME: We should eliminate the i32* argument. + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + + store i32 0, i32* %X + ret void +} + +define i32 @test(i32* %X) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32* [[X:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f( %struct.ss* byval %S, i32* byval %X) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/byval.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i64 } + +define internal void @f(%struct.ss* byval %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define internal void @g(%struct.ss* byval align 32 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 +; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %tmp1 = load i32, i32* %tmp, align 4 + %tmp2 = add i32 %tmp1, 1 + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + + +define i32 @main() nounwind { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, i64* [[TMP4]], align 4 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) +; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4 +; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 +; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %tmp4, align 4 + call void @f(%struct.ss* byval %S) nounwind + call void @g(%struct.ss* byval %S) nounwind + ret i32 0 +} + + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/chained.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +@G1 = constant i32 0 +@G2 = constant i32* @G1 + +define internal i32 @test(i32** %x) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL_VAL]] +; +entry: + %y = load i32*, i32** %x + %z = load i32, i32* %y + ret i32 %z +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G2_VAL:%.*]] = load i32*, i32** @G2 +; CHECK-NEXT: [[G2_VAL_VAL:%.*]] = load i32, i32* [[G2_VAL]] +; CHECK-NEXT: [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %x = call i32 @test(i32** @G2) + ret i32 %x +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +; Don't promote around control flow. +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; CHECK-NEXT: ret i32 [[X]] +; +entry: + br i1 %C, label %T, label %F + +T: + ret i32 17 + +F: + %X = load i32, i32* %P + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 true, i32* null) +; CHECK-NEXT: ret i32 [[X]] +; +entry: + %X = call i32 @callee(i1 true, i32* null) + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/control-flow2.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define internal i32 @callee(i1 %C, i32* %P) { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 17 +; CHECK: F: +; CHECK-NEXT: ret i32 [[P_VAL]] +; + br i1 %C, label %T, label %F + +T: ; preds = %0 + ret i32 17 + +F: ; preds = %0 + %X = load i32, i32* %P ; [#uses=1] + ret i32 %X +} + +define i32 @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 17, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]]) +; CHECK-NEXT: ret i32 [[X]] +; + %A = alloca i32 ; [#uses=2] + store i32 17, i32* %A + %X = call i32 @callee( i1 false, i32* %A ) ; [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/crash.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S < %s -inline -argpromotion -maxar=2147483647 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM +; RUN: opt -S < %s -passes=inline,argpromotion -maxar=2147483647 | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM + +%S = type { %S* } + +; Inlining should nuke the invoke (and any inlined calls) here even with +; argument promotion running along with it. +define void @zot() personality i32 (...)* @wibble { +; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: hoge.exit: +; ARGPROMOTION-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION: bb1: +; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION: bb2: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION-NEXT: cleanup +; ARGPROMOTION-NEXT: unreachable +; +bb: + invoke void @hoge() + to label %bb1 unwind label %bb2 + +bb1: + unreachable + +bb2: + %tmp = landingpad { i8*, i32 } + cleanup + unreachable +} + +define internal void @hoge() { +bb: + %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) + %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney) + unreachable +} + +define internal fastcc i8* @spam(i1 (i8*)* %arg) { +bb: + unreachable +} + +define internal i1 @eggs(i8* %arg) { +; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs() +; ALL_NEWPM-NEXT: bb: +; ALL_NEWPM-NEXT: unreachable +; +bb: + %tmp = call zeroext i1 @barney(i8* %arg) + unreachable +} + +define internal i1 @barney(i8* %arg) { +bb: + ret i1 undef +} + +define i32 @test_inf_promote_caller(i32 %arg) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = alloca %S + %tmp1 = alloca %S + %tmp2 = call i32 @test_inf_promote_callee(%S* %tmp, %S* %tmp1) + + ret i32 0 +} + +define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee +; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] +; ARGPROMOTION-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) +; ARGPROMOTION-NEXT: ret i32 0 +; +bb: + %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0 + %tmp2 = load %S*, %S** %tmp + %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0 + %tmp4 = load %S*, %S** %tmp3 + %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2) + + ret i32 0 +} + +declare i32 @wibble(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/dbg.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +declare void @sink(i32) + +define internal void @test(i32** %X) !dbg !2 { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3 +; CHECK-NEXT: call void @sink(i32 [[X_VAL_VAL]]) +; CHECK-NEXT: ret void +; + %1 = load i32*, i32** %X, align 8 + %2 = load i32, i32* %1, align 8 + call void @sink(i32 %2) + ret void +} + +%struct.pair = type { i32, i32 } + +define internal void @test_byval(%struct.pair* byval %P) { +; CHECK-LABEL: define {{[^@]+}}@test_byval +; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) +; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: ret void +; + ret void +} + +define void @caller(i32** %Y, %struct.pair* %P) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32** [[Y:%.*]], %struct.pair* [[P:%.*]]) +; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg !4 +; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg !4 +; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg !4 +; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg !5 +; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg !5 +; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5 +; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg !5 +; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5 +; CHECK-NEXT: ret void +; + call void @test(i32** %Y), !dbg !1 + + call void @test_byval(%struct.pair* %P), !dbg !6 + ret void +} + + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!3} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !DILocation(line: 8, scope: !2) +!2 = distinct !DISubprogram(name: "test", file: !5, line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null) +!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5) +!5 = !DIFile(filename: "test.c", directory: "") +!6 = !DILocation(line: 9, scope: !2) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/fp80.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%union.u = type { x86_fp80 } +%struct.s = type { double, i16, i8, [5 x i8] } + +@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 + +%struct.Foo = type { i32, i64 } +@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 + +define void @run() { +; CHECK-LABEL: define {{[^@]+}}@run() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a) +; CHECK-NEXT: ret void +; +entry: + tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) + call i64 @AccessPaddingOfStruct(%struct.Foo* @a) + call i64 @CaptureAStruct(%struct.Foo* @a) + ret void +} + +define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely +; CHECK-SAME: (%union.u* byval align 16 [[ARG:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 +; CHECK-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] +; CHECK-NEXT: ret i8 [[RESULT]] +; +entry: + %bitcast = bitcast %union.u* %arg to %struct.s* + %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 + %result = load i8, i8* %gep + ret i8 %result +} + +define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely +; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) +; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* [[ARG]], i32 0, i32 0 +; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0 +; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] +; CHECK-NEXT: ret x86_fp80 [[FP80]] +; + %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80, x86_fp80* %gep + ret x86_fp80 %fp80 +} + +define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* +; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[P]] +; CHECK-NEXT: ret i64 [[V]] +; + %p = bitcast %struct.Foo* %a to i64* + %v = load i64, i64* %p + ret i64 %v +} + +define internal i64 @CaptureAStruct(%struct.Foo* byval %a) { +; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct +; CHECK-SAME: (%struct.Foo* byval [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] +; CHECK-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]] +; CHECK-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 +; CHECK-NEXT: br label [[LOOP]] +; +entry: + %a_ptr = alloca %struct.Foo* + br label %loop + +loop: + %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ] + %0 = phi %struct.Foo* [ %a, %entry ], [ %0, %loop ] + store %struct.Foo* %phi, %struct.Foo** %a_ptr + %gep = getelementptr %struct.Foo, %struct.Foo* %a, i64 0 + br label %loop +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/inalloca.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -globalopt -argpromotion -maxar=2147483647 -sroa -S | FileCheck %s +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -maxar=2147483647 -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i32 } + +; Argpromote + sroa should change this to passing the two integers by value. +define internal i32 @f(%struct.ss* inalloca %s) { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 + %a = load i32, i32* %f0, align 4 + %b = load i32, i32* %f1, align 4 + %r = add i32 %a, %b + ret i32 %r +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2) +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %S = alloca inalloca %struct.ss + %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i32 1, i32* %f0, align 4 + store i32 2, i32* %f1, align 4 + %r = call i32 @f(%struct.ss* inalloca %S) + ret i32 %r +} + +; Argpromote can't promote %a because of the icmp use. +define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq %struct.ss* [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; +entry: + %c = icmp eq %struct.ss* %a, %b + ret i1 %c +} + +define i32 @test() { +; CHECK-LABEL: define {{[^@]+}}@test() local_unnamed_addr +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] +; CHECK-NEXT: [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca inalloca %struct.ss + %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/invalidation.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; Check that when argument promotion changes a function in some parent node of +; the call graph, any analyses that happened to be cached for that function are +; actually invalidated. We are using `demanded-bits` here because when printed +; it will end up caching a value for every instruction, making it easy to +; detect the instruction-level changes that will fail here. With improper +; invalidation this will crash in the second printer as it tries to reuse +; now-invalid demanded bits. +; +; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -maxar=2147483647 -S | FileCheck %s + +@G = constant i32 0 + +define internal i32 @a(i32* %x) { +; CHECK-LABEL: define {{[^@]+}}@a +; CHECK-SAME: (i32 [[X_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[X_VAL]] +; +entry: + %v = load i32, i32* %x + ret i32 %v +} + +define i32 @b() { +; CHECK-LABEL: define {{[^@]+}}@b() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %v = call i32 @a(i32* @G) + ret i32 %v +} + +define i32 @c() { +; CHECK-LABEL: define {{[^@]+}}@c() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @G +; CHECK-NEXT: [[V1:%.*]] = call i32 @a(i32 [[G_VAL]]) +; CHECK-NEXT: [[V2:%.*]] = call i32 @b() +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %v1 = call i32 @a(i32* @G) + %v2 = call i32 @b() + %result = add i32 %v1, %v2 + ret i32 %result +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/min-legal-vector-width.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s +; Test that we only promote arguments when the caller/callee have compatible +; function attrubtes. + +target triple = "x86_64-unknown-linux-gnu" + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should not promote +define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]] +; CHECK-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { +; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; This should promote +define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { +; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] +; CHECK-NEXT: ret void +; +bb: + %tmp = load <8 x i64>, <8 x i64>* %arg1 + store <8 x i64> %tmp, <8 x i64>* %arg + ret void +} + +define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { +; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CHECK-SAME: (<8 x i64>* [[ARG:%.*]]) +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca <8 x i64>, align 32 + %tmp2 = alloca <8 x i64>, align 32 + %tmp3 = bitcast <8 x i64>* %tmp to i8* + call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) + call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) + %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 + store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 + ret void +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; If the arguments are scalar, its ok to promote. +define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %X, i32* %Y) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) +; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = load i32, i32* %X + %B = load i32, i32* %Y + %C = add i32 %A, %B + ret i32 %C +} + +define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %B) #2 { +; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CHECK-SAME: (i32* [[B:%.*]]) +; CHECK-NEXT: [[A:%.*]] = alloca i32 +; CHECK-NEXT: store i32 1, i32* [[A]] +; CHECK-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] +; CHECK-NEXT: [[B_VAL:%.*]] = load i32, i32* [[B]] +; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]]) +; CHECK-NEXT: ret i32 [[C]] +; + %A = alloca i32 + store i32 1, i32* %A + %C = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32* %A, i32* %B) + ret i32 %C +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 + +attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" } +attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" } +attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } +attributes #5 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/musttail.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; PR36543 + +; Don't promote arguments of musttail callee + +%T = type { i32, i32, i32, i32 } + +define internal i32 @test(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@test +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[V]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + ret i32 %v +} + +define i32 @caller(%T* %p) { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (%T* [[P:%.*]]) +; CHECK-NEXT: [[V:%.*]] = musttail call i32 @test(%T* [[P]]) +; CHECK-NEXT: ret i32 [[V]] +; + %v = musttail call i32 @test(%T* %p) + ret i32 %v +} + +; Don't promote arguments of musttail caller + +define i32 @foo(%T* %p, i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (%T* [[P:%.*]], i32 [[V:%.*]]) +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define internal i32 @test2(%T* %p, i32 %p2) { +; CHECK-LABEL: define {{[^@]+}}@test2 +; CHECK-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]]) +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; CHECK-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; CHECK-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]]) +; CHECK-NEXT: ret i32 [[CA]] +; + %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 + %a = load i32, i32* %a.gep + %b = load i32, i32* %b.gep + %v = add i32 %a, %b + %ca = musttail call i32 @foo(%T* undef, i32 %v) + ret i32 %ca +} + +define i32 @caller2(%T* %g) { +; CHECK-LABEL: define {{[^@]+}}@caller2 +; CHECK-SAME: (%T* [[G:%.*]]) +; CHECK-NEXT: [[V:%.*]] = call i32 @test2(%T* [[G]], i32 0) +; CHECK-NEXT: ret i32 [[V]] +; + %v = call i32 @test2(%T* %g, i32 0) + ret i32 %v +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/naked_functions.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s + +; Don't promote paramaters of/arguments to naked functions + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* @g) +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) #0 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (i32* [[TMP0:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + + +attributes #0 = { naked } diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/nonzero-address-spaces.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s + +; ArgumentPromotion should preserve the default function address space +; from the data layout. + +target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" + +@g = common global i32 0, align 4 + +define i32 @bar() { +; CHECK-LABEL: define {{[^@]+}}@bar() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() +; CHECK-NEXT: ret i32 [[CALL]] +; + +entry: + %call = call i32 @foo(i32* @g) + ret i32 %call +} + +define internal i32 @foo(i32*) { +; CHECK-LABEL: define {{[^@]+}}@foo() addrspace(1) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; CHECK-NEXT: unreachable +; +entry: + %retval = alloca i32, align 4 + call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() + unreachable +} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr27568.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -maxar=2147483647 < %s | FileCheck %s +; RUN: opt -S -passes=argpromotion -maxar=2147483647 < %s | FileCheck %s +; RUN: opt -S -debugify -maxar=2147483647 -o /dev/null < %s +target triple = "x86_64-pc-windows-msvc" + +define internal void @callee(i8*) { +; CHECK-LABEL: define {{[^@]+}}@callee() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @thunk() +; CHECK-NEXT: ret void +; +entry: + call void @thunk() + ret void +} + +define void @test1() personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @thunk() +; CHECK-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] +; CHECK: out: +; CHECK-NEXT: ret void +; CHECK: cpad: +; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none [] +; CHECK-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] +; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller +; +entry: + invoke void @thunk() + to label %out unwind label %cpad + +out: + ret void + +cpad: + %pad = cleanuppad within none [] + call void @callee(i8* null) [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +} + + +declare void @thunk() + +declare i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr32917.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; PR 32917 + +@b = common local_unnamed_addr global i32 0, align 4 +@a = common local_unnamed_addr global i32 0, align 4 + +define i32 @fn2() local_unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; CHECK-NEXT: [[DOTIDX:%.*]] = getelementptr i32, i32* [[TMP3]], i64 -1 +; CHECK-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 +; CHECK-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) +; CHECK-NEXT: ret i32 undef +; + %1 = load i32, i32* @b, align 4 + %2 = sext i32 %1 to i64 + %3 = inttoptr i64 %2 to i32* + call fastcc void @fn1(i32* %3) + ret i32 undef +} + +define internal fastcc void @fn1(i32* nocapture readonly) unnamed_addr { +; CHECK-LABEL: define {{[^@]+}}@fn1 +; CHECK-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr +; CHECK-NEXT: store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4 +; CHECK-NEXT: ret void +; + %2 = getelementptr inbounds i32, i32* %0, i64 -1 + %3 = load i32, i32* %2, align 4 + store i32 %3, i32* @a, align 4 + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/pr33641_remove_arg_dbgvalue.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=2147483647 -verify -dse -S %s -o - | FileCheck %s + +; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to +; dbg.value which still used the removed argument. + +; The %p argument should be removed, and the use of it in dbg.value should be +; changed to undef. + +%p_t = type i16* +%fun_t = type void (%p_t)* + +define void @foo() { +; CHECK-LABEL: define {{[^@]+}}@foo() +; CHECK-NEXT: ret void +; + %tmp = alloca %fun_t + store %fun_t @bar, %fun_t* %tmp + ret void +} + +define internal void @bar(%p_t %p) { +; CHECK-LABEL: define {{[^@]+}}@bar() +; CHECK-NEXT: call void @llvm.dbg.value(metadata i16* undef, metadata !3, metadata !DIExpression()), !dbg !5 +; CHECK-NEXT: ret void +; + call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "bar", unit: !0) +!4 = !DILocalVariable(name: "p", scope: !3) +!5 = !DIExpression() +!6 = !DILocation(line: 1, column: 1, scope: !3) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/profile.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -argpromotion -maxar=2147483647 -mem2reg -S < %s | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +; Checks if !prof metadata is corret in deadargelim. + +define void @caller() #0 { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 +; CHECK-NEXT: ret void +; + %x = alloca i32 + store i32 42, i32* %x + call void @promote_i32_ptr(i32* %x), !prof !0 + ret void +} + +define internal void @promote_i32_ptr(i32* %xp) { +; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr +; CHECK-SAME: (i32 [[XP_VAL:%.*]]) +; CHECK-NEXT: call void @use_i32(i32 [[XP_VAL]]) +; CHECK-NEXT: ret void +; + %x = load i32, i32* %xp + call void @use_i32(i32 %x) + ret void +} + +declare void @use_i32(i32) + +!0 = !{!"branch_weights", i32 30} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/reserve-tbaa.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +; PR17906 +; When we promote two arguments in a single function with different types, +; before the fix, we used the same tag for the newly-created two loads. +; This testing case makes sure that we correctly transfer the tbaa tags from the +; original loads to the newly-created loads when promoting pointer arguments. + +@a = global i32* null, align 8 +@e = global i32** @a, align 8 +@g = global i32 0, align 4 +@c = global i64 0, align 8 +@d = global i8 0, align 1 + +define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) { +; CHECK-LABEL: define {{[^@]+}}@fn +; CHECK-SAME: (i32 [[P1_VAL:%.*]], i64 [[P2_VAL:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[P2_VAL]] to i32 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[P1_VAL]] to i8 +; CHECK-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !0 +; CHECK-NEXT: ret void +; +entry: + %0 = load i64, i64* %p2, align 8, !tbaa !1 + %conv = trunc i64 %0 to i32 + %1 = load i32, i32* %p1, align 4, !tbaa !5 + %conv1 = trunc i32 %1 to i8 + store i8 %conv1, i8* @d, align 1, !tbaa !7 + ret void +} + +define i32 @main() { +; CHECK-LABEL: define {{[^@]+}}@main() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !3 +; CHECK-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !3 +; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !3 +; CHECK-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !5 +; CHECK-NEXT: [[G_VAL:%.*]] = load i32, i32* @g, align 4, !tbaa !5 +; CHECK-NEXT: [[C_VAL:%.*]] = load i64, i64* @c, align 8, !tbaa !7 +; CHECK-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = load i32**, i32*** @e, align 8, !tbaa !8 + store i32* @g, i32** %0, align 8, !tbaa !8 + %1 = load i32*, i32** @a, align 8, !tbaa !8 + store i32 1, i32* %1, align 4, !tbaa !5 + call fastcc void @fn(i32* @g, i64* @c) + + ret i32 0 +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"long", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !3, i64 0} +!7 = !{!3, !3, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"any pointer", !3, i64 0} + diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/sret.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define internal void @add({i32, i32}* %this, i32* sret %r) { +; CHECK-LABEL: define {{[^@]+}}@add +; CHECK-SAME: (i32 [[THIS_0_0_VAL:%.*]], i32 [[THIS_0_1_VAL:%.*]], i32* noalias [[R:%.*]]) +; CHECK-NEXT: [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]] +; CHECK-NEXT: store i32 [[AB]], i32* [[R]] +; CHECK-NEXT: ret void +; + %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 + %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 + %a = load i32, i32* %ap + %b = load i32, i32* %bp + %ab = add i32 %a, %b + store i32 %ab, i32* %r + ret void +} + +define void @f() { +; CHECK-LABEL: define {{[^@]+}}@f() +; CHECK-NEXT: [[R:%.*]] = alloca i32 +; CHECK-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; CHECK-NEXT: [[PAIR_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 0 +; CHECK-NEXT: [[PAIR_IDX_VAL:%.*]] = load i32, i32* [[PAIR_IDX]] +; CHECK-NEXT: [[PAIR_IDX1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[PAIR]], i64 0, i32 1 +; CHECK-NEXT: [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]] +; CHECK-NEXT: call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]]) +; CHECK-NEXT: ret void +; + %r = alloca i32 + %pair = alloca {i32, i32} + + call void @add({i32, i32}* %pair, i32* sret %r) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/tail.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt %s -argpromotion -maxar=2147483647 -S -o - | FileCheck %s +; RUN: opt %s -passes=argpromotion -maxar=2147483647 -S -o - | FileCheck %s +; PR14710 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%pair = type { i32, i32 } + +declare i8* @foo(%pair*) + +define internal void @bar(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]]) +; CHECK-NEXT: [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[DATA_0]], i32* [[DOT0]], align 4 +; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[DATA_1]], i32* [[DOT1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]]) +; CHECK-NEXT: ret void +; + tail call i8* @foo(%pair* %Data) + ret void +} + +define void @zed(%pair* byval %Data) { +; CHECK-LABEL: define {{[^@]+}}@zed +; CHECK-SAME: (%pair* byval [[DATA:%.*]]) +; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0 +; CHECK-NEXT: [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4 +; CHECK-NEXT: [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; CHECK-NEXT: [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4 +; CHECK-NEXT: call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]]) +; CHECK-NEXT: ret void +; + call void @bar(%pair* byval %Data) + ret void +} diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/thiscall.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; In PR41658, argpromotion put an inalloca in a position that per the +; calling convention is passed in a register. This test verifies that +; we don't do that anymore. It also verifies that the combination of +; globalopt and argpromotion is able to optimize the call safely. +; +; RUN: opt -S -argpromotion -maxar=2147483647 %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -S -globalopt -argpromotion -maxar=2147483647 %s | FileCheck %s --check-prefix=GLOBALOPT_ARGPROMOTION + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.11.0" + +%struct.a = type { i8 } + +define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; ARGPROMOTION-SAME: (%struct.a* [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca [[TMP0:%.*]]) +; ARGPROMOTION-NEXT: entry: +; ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@internalfun +; GLOBALOPT_ARGPROMOTION-SAME: (<{ [[STRUCT_A:%.*]] }>* [[TMP0:%.*]]) unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: entry: +; GLOBALOPT_ARGPROMOTION-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; GLOBALOPT_ARGPROMOTION-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* [[TMP1]], %struct.a* dereferenceable(1) [[A]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; +entry: + %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 + %argmem = alloca inalloca <{ %struct.a }>, align 4 + %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0 + %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a) + call void @ext(<{ %struct.a }>* inalloca %argmem) + ret void +} + +; This is here to ensure @internalfun is live. +define void @exportedfun(%struct.a* %a) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) +; ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; ARGPROMOTION-NEXT: call x86_thiscallcc void @internalfun(%struct.a* [[A]], <{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) +; ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; ARGPROMOTION-NEXT: ret void +; +; GLOBALOPT_ARGPROMOTION-LABEL: define {{[^@]+}}@exportedfun +; GLOBALOPT_ARGPROMOTION-SAME: (%struct.a* [[A:%.*]]) local_unnamed_addr +; GLOBALOPT_ARGPROMOTION-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; GLOBALOPT_ARGPROMOTION-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; GLOBALOPT_ARGPROMOTION-NEXT: call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]]) +; GLOBALOPT_ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; + %inalloca.save = tail call i8* @llvm.stacksave() + %argmem = alloca inalloca <{ %struct.a }>, align 4 + call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem) + call void @llvm.stackrestore(i8* %inalloca.save) + ret void +} + +declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1)) +declare void @ext(<{ %struct.a }>* inalloca) +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) diff --git a/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/magic-values-maxar-intmax/variadic.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -argpromotion -maxar=2147483647 -S | FileCheck %s +; RUN: opt < %s -passes=argpromotion -maxar=2147483647 -S | FileCheck %s + +; Unused arguments from variadic functions cannot be eliminated as that changes +; their classiciation according to the SysV amd64 ABI. Clang and other frontends +; bake in the classification when they use things like byval, as in this test. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.tt0 = type { i64, i64 } +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8 + +; Function Attrs: nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) +; CHECK-NEXT: ret i32 0 +; +entry: + tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) + ret i32 0 +} + +; Function Attrs: nounwind uwtable +define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) { +; CHECK-LABEL: define {{[^@]+}}@callee_t0f +; CHECK-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...) +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +}