diff --git a/clang/test/CodeGen/2008-07-30-implicit-initialization.c b/clang/test/CodeGen/2008-07-30-implicit-initialization.c --- a/clang/test/CodeGen/2008-07-30-implicit-initialization.c +++ b/clang/test/CodeGen/2008-07-30-implicit-initialization.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i32 @f0() // CHECK: ret i32 0 // CHECK-LABEL: define i32 @f1() diff --git a/clang/test/CodeGen/arm-fp16-arguments.c b/clang/test/CodeGen/arm-fp16-arguments.c --- a/clang/test/CodeGen/arm-fp16-arguments.c +++ b/clang/test/CodeGen/arm-fp16-arguments.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=NATIVE +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE __fp16 g; diff --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp --- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp +++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-SOFT // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-HARD // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ // RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \ -// RUN: -emit-llvm -o - -O1 %s \ +// RUN: -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-FULL typedef float float32_t; diff --git a/clang/test/CodeGen/atomic-ops-libcall.c b/clang/test/CodeGen/atomic-ops-libcall.c --- a/clang/test/CodeGen/atomic-ops-libcall.c +++ b/clang/test/CodeGen/atomic-ops-libcall.c @@ -10,109 +10,109 @@ int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_add_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_sub_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int test_c11_atomic_fetch_add_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_add_int - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_sub_int - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int *fp2a(int **p) { // CHECK: @fp2a - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) // Note, the GNU builtins do not multiply by sizeof(T)! return __atomic_fetch_sub(p, 4, memory_order_relaxed); } int test_atomic_fetch_add(int *p) { // CHECK: test_atomic_fetch_add - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_add(p, 55, memory_order_seq_cst); } int test_atomic_fetch_sub(int *p) { // CHECK: test_atomic_fetch_sub - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_sub(p, 55, memory_order_seq_cst); } int test_atomic_fetch_and(int *p) { // CHECK: test_atomic_fetch_and - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_and(p, 55, memory_order_seq_cst); } int test_atomic_fetch_or(int *p) { // CHECK: test_atomic_fetch_or - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_or(p, 55, memory_order_seq_cst); } int test_atomic_fetch_xor(int *p) { // CHECK: test_atomic_fetch_xor - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_xor(p, 55, memory_order_seq_cst); } int test_atomic_fetch_nand(int *p) { // CHECK: test_atomic_fetch_nand - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_nand(p, 55, memory_order_seq_cst); } int test_atomic_add_fetch(int *p) { // CHECK: test_atomic_add_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55 return __atomic_add_fetch(p, 55, memory_order_seq_cst); } int test_atomic_sub_fetch(int *p) { // CHECK: test_atomic_sub_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55 return __atomic_sub_fetch(p, 55, memory_order_seq_cst); } int test_atomic_and_fetch(int *p) { // CHECK: test_atomic_and_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55 return __atomic_and_fetch(p, 55, memory_order_seq_cst); } int test_atomic_or_fetch(int *p) { // CHECK: test_atomic_or_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55 return __atomic_or_fetch(p, 55, memory_order_seq_cst); } int test_atomic_xor_fetch(int *p) { // CHECK: test_atomic_xor_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55 return __atomic_xor_fetch(p, 55, memory_order_seq_cst); } int test_atomic_nand_fetch(int *p) { // CHECK: test_atomic_nand_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) // FIXME: We should not be checking optimized IR. It changes independently of clang. // FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55 // FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1 diff --git a/clang/test/CodeGenCXX/atomicinit.cpp b/clang/test/CodeGenCXX/atomicinit.cpp --- a/clang/test/CodeGenCXX/atomicinit.cpp +++ b/clang/test/CodeGenCXX/atomicinit.cpp @@ -31,7 +31,7 @@ // CHECK-LABEL: define void @_Z11atomic_initR1Ai void atomic_init(A& a, int i) { // CHECK-NOT: atomic - // CHECK: tail call void @_ZN1BC1Ei + // CHECK: call void @_ZN1BC1Ei __c11_atomic_init(&b, B(i)); // CHECK-NEXT: ret void } diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -645,7 +645,7 @@ // ZERO-LABEL: @test_smallpartinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: store i16 0, i16* %uninit, align 2 -// ZERO-O1-NEWPM: store i16 42, i16* %uninit, align 2 +// ZERO-O1-NEWPM: store i16 0, i16* %uninit, align 2 TEST_BRACES(smallpartinit, smallpartinit); // CHECK-LABEL: @test_smallpartinit_braces() @@ -718,7 +718,7 @@ // PATTERN-LABEL: @test_paddednullinit_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit // PATTERN-O1-LEGACY: store i64 [[I64]], i64* %uninit, align 8 -// PATTERN-O1-NEWPM: store i64 2863311360, i64* %uninit, align 8 +// PATTERN-O1-NEWPM: store i64 [[I64]], i64* %uninit, align 8 // ZERO-LABEL: @test_paddednullinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1: store i64 0, i64* %uninit, align 8 @@ -1344,10 +1344,7 @@ // ZERO-LABEL: @test_virtualderived_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: call void @llvm.memset{{.*}}, i8 0, -// ZERO-O1-NEWPM: [[FIELD1:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 1, i32 0, i32 0 -// ZERO-O1-NEWPM: [[FIELD0:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 0, i32 0 -// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 0, i64 5) to i32 (...)**), i32 (...)*** [[FIELD0]], align 8 -// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** [[FIELD1]], align 8 +// ZERO-O1-NEWPM: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(virtualderived, virtualderived); // CHECK-LABEL: @test_virtualderived_braces() diff --git a/clang/test/CodeGenCXX/discard-name-values.cpp b/clang/test/CodeGenCXX/discard-name-values.cpp --- a/clang/test/CodeGenCXX/discard-name-values.cpp +++ b/clang/test/CodeGenCXX/discard-name-values.cpp @@ -11,11 +11,11 @@ if (pred) { // DISCARDVALUE: 2: - // DISCARDVALUE-NEXT: tail call void @branch() + // DISCARDVALUE-NEXT: call void @branch() // DISCARDVALUE-NEXT: br label %3 // CHECK: if.then: - // CHECK-NEXT: tail call void @branch() + // CHECK-NEXT: call void @branch() // CHECK-NEXT: br label %if.end branch(); } diff --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp --- a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp @@ -13,7 +13,7 @@ T* test1(V* x) { return &dynamic_cast(*x); } // CHECK-LABEL: define dso_local %struct.T* @"?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -25,7 +25,7 @@ // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -39,14 +39,14 @@ // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] T* test4(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local %struct.T* @"?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -60,7 +60,7 @@ // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -78,7 +78,7 @@ // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -87,7 +87,7 @@ void* test7(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local i8* @"?test7@@YAPAXPAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[RET:%.*]] = tail call i8* @__RTCastToVoid(i8* [[CAST]]) +// CHECK-NEXT: [[RET:%.*]] = call i8* @__RTCastToVoid(i8* [[CAST]]) // CHECK-NEXT: ret i8* [[RET]] void* test8(A* x) { return dynamic_cast(x); } @@ -100,7 +100,7 @@ // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[RES:%.*]] = tail call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RES:%.*]] = call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] @@ -117,7 +117,7 @@ // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* [[ADJ]]) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTCastToVoid(i8* [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp @@ -25,10 +25,10 @@ const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK-LABEL: define dso_local %struct.type_info* @"?test3_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[CALL:%.*]] = tail call %struct.A* @"?fn@@YAPAUA@@XZ"() +// CHECK: [[CALL:%.*]] = call %struct.A* @"?fn@@YAPAUA@@XZ"() // CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.A* [[CALL]], null // CHECK-NEXT: br i1 [[CMP]] -// CHECK: tail call i8* @__RTtypeid(i8* null) +// CHECK: call i8* @__RTtypeid(i8* null) // CHECK-NEXT: unreachable // CHECK: [[THIS:%.*]] = bitcast %struct.A* [[CALL]] to i8* // CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr %struct.A, %struct.A* [[CALL]], i32 0, i32 0 @@ -36,7 +36,7 @@ // CHECK-NEXT: [[VBSLOT:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBASE_OFFS:%.*]] = load i32, i32* [[VBSLOT]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[THIS]], i32 [[VBASE_OFFS]] -// CHECK-NEXT: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RT:%.*]] = call i8* @__RTtypeid(i8* nonnull [[ADJ]]) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] @@ -46,7 +46,7 @@ const std::type_info* test5_typeid() { return &typeid(v); } // CHECK: define dso_local %struct.type_info* @"?test5_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) +// CHECK: [[RT:%.*]] = call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp --- a/clang/test/CodeGenCXX/nrvo.cpp +++ b/clang/test/CodeGenCXX/nrvo.cpp @@ -33,13 +33,13 @@ // CHECK-LABEL: define void @_Z5test1b( // CHECK-EH-LABEL: define void @_Z5test1b( X test1(bool B) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret void X x; if (B) return (x); return x; - // CHECK-EH: tail call {{.*}} @_ZN1XC1Ev + // CHECK-EH: call {{.*}} @_ZN1XC1Ev // CHECK-EH-NEXT: ret void } @@ -130,7 +130,7 @@ // CHECK-LABEL: define void @_Z5test3b X test3(bool B) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NOT: call {{.*}} @_ZN1XC1ERKS_ // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK: call {{.*}} @_ZN1XC1ERKS_ @@ -148,14 +148,14 @@ // CHECK-LABEL: define void @_Z5test4b X test4(bool B) { { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev X x; // CHECK: br i1 if (B) return x; } - // CHECK: tail call {{.*}} @_ZN1XD1Ev - // CHECK: tail call void @exit(i32 1) + // CHECK: call {{.*}} @_ZN1XD1Ev + // CHECK: call void @exit(i32 1) exit(1); } @@ -191,7 +191,7 @@ // CHECK-LABEL: define void @_Z5test7b X test7(bool b) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -202,7 +202,7 @@ // CHECK-LABEL: define void @_Z5test8b X test8(bool b) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -218,6 +218,6 @@ } // CHECK-LABEL: define linkonce_odr void @_ZN1YIiE1fEv -// CHECK: tail call {{.*}} @_ZN1YIiEC1Ev +// CHECK: call {{.*}} @_ZN1YIiEC1Ev // CHECK-EH-03: attributes [[NR_NUW]] = { noreturn nounwind } diff --git a/clang/test/CodeGenCXX/stack-reuse.cpp b/clang/test/CodeGenCXX/stack-reuse.cpp --- a/clang/test/CodeGenCXX/stack-reuse.cpp +++ b/clang/test/CodeGenCXX/stack-reuse.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O1 | FileCheck %s +// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O2 | FileCheck %s // Stack should be reused when possible, no need to allocate two separate slots // if they have disjoint lifetime. diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp --- a/clang/test/CodeGenCXX/wasm-args-returns.cpp +++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp @@ -19,8 +19,8 @@ // CHECK: define double @_Z7forward9one_field(double returned %{{.*}}) // // CHECK: define void @_Z14test_one_fieldv() -// CHECK: %[[call:.*]] = tail call double @_Z13def_one_fieldv() -// CHECK: tail call void @_Z3use9one_field(double %[[call]]) +// CHECK: %[[call:.*]] = call double @_Z13def_one_fieldv() +// CHECK: call void @_Z3use9one_field(double %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use9one_field(double) @@ -82,8 +82,8 @@ // CHECK: define void @_Z7forward5empty() // // CHECK: define void @_Z10test_emptyv() -// CHECK: tail call void @_Z9def_emptyv() -// CHECK: tail call void @_Z3use5empty() +// CHECK: call void @_Z9def_emptyv() +// CHECK: call void @_Z3use5empty() // CHECK: ret void // // CHECK: declare void @_Z3use5empty() @@ -96,8 +96,8 @@ // CHECK: define i32 @_Z7forward12one_bitfield(i32 returned %{{.*}}) // // CHECK: define void @_Z17test_one_bitfieldv() -// CHECK: %[[call:.*]] = tail call i32 @_Z16def_one_bitfieldv() -// CHECK: tail call void @_Z3use12one_bitfield(i32 %[[call]]) +// CHECK: %[[call:.*]] = call i32 @_Z16def_one_bitfieldv() +// CHECK: call void @_Z3use12one_bitfield(i32 %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use12one_bitfield(i32) diff --git a/clang/test/CodeGenObjCXX/arc-blocks.mm b/clang/test/CodeGenObjCXX/arc-blocks.mm --- a/clang/test/CodeGenObjCXX/arc-blocks.mm +++ b/clang/test/CodeGenObjCXX/arc-blocks.mm @@ -122,7 +122,7 @@ // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__copy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__copy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( // CHECK: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( @@ -170,8 +170,8 @@ // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__destroy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( namespace { diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm --- a/clang/test/CodeGenObjCXX/nrvo.mm +++ b/clang/test/CodeGenObjCXX/nrvo.mm @@ -14,7 +14,7 @@ // CHECK: define internal void @"\01-[NRVO getNRVO]" - (X)getNRVO { X x; - // CHECK: tail call void @_ZN1XC1Ev + // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; } @@ -24,7 +24,7 @@ return ^{ // CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke X x; - // CHECK: tail call void @_ZN1XC1Ev + // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; }() ; diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c @@ -1,16 +1,16 @@ -// Test CF+LF are properly handled along with quoted, multi-line #error -// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s - -#ifndef TEST -#error "message \ - more message \ - even more" -#endif - -#ifdef OTHER -#include -#endif - -// CHECK: #ifdef OTHER -// CHECK-NEXT: #include -// CHECK-NEXT: #endif +// Test CF+LF are properly handled along with quoted, multi-line #error +// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#ifndef TEST +#error "message \ + more message \ + even more" +#endif + +#ifdef OTHER +#include +#endif + +// CHECK: #ifdef OTHER +// CHECK-NEXT: #include +// CHECK-NEXT: #endif diff --git a/clang/test/PCH/no-escaping-block-tail-calls.cpp b/clang/test/PCH/no-escaping-block-tail-calls.cpp --- a/clang/test/PCH/no-escaping-block-tail-calls.cpp +++ b/clang/test/PCH/no-escaping-block-tail-calls.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O1 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h -// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O1 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O2 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h +// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O2 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s // Check that -fno-escaping-block-tail-calls doesn't disable tail-call // optimization if the block is non-escaping. diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -151,10 +151,6 @@ /// Optimize quickly without destroying debuggability. /// - /// FIXME: The current and historical behavior of this level does *not* - /// agree with this goal, but we would like to move toward this goal in the - /// future. - /// /// This level is tuned to produce a result from the optimizer as quickly /// as possible and to avoid destroying debuggability. This tends to result /// in a very good development mode where the compiled code will be @@ -164,9 +160,9 @@ /// debugging of the resulting binary. /// /// As an example, complex loop transformations such as versioning, - /// vectorization, or fusion might not make sense here due to the degree to - /// which the executed code would differ from the source code, and the - /// potential compile time cost. + /// vectorization, or fusion don't make sense here due to the degree to + /// which the executed code differs from the source code, and the compile time + /// cost. O1, /// Optimize for fast execution as much as possible without triggering diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -400,21 +400,25 @@ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - if (EnableGVNHoist) - FPM.addPass(GVNHoistPass()); - - // Global value numbering based sinking. - if (EnableGVNSink) { - FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + if (Level > O1) { + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } } // Speculative execution if the target has divergent branches; otherwise nop. - FPM.addPass(SpeculativeExecutionPass()); + if (Level > O1) { + FPM.addPass(SpeculativeExecutionPass()); - // Optimize based on known information about branches, and cleanup afterward. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + } FPM.addPass(SimplifyCFGPass()); if (Level == O3) FPM.addPass(AggressiveInstCombinePass()); @@ -428,10 +432,12 @@ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level)) + !isOptimizingForSize(Level) && Level > O1) FPM.addPass(PGOMemOPSizeOpt()); - FPM.addPass(TailCallElimPass()); + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (Level > O1) + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); // Form canonically associated expression trees, and simplify the trees using @@ -458,6 +464,7 @@ // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); + // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); @@ -525,18 +532,21 @@ // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - EnableMSSALoopDependency, DebugLogging)); + if (Level > O1) { + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + EnableMSSALoopDependency, DebugLogging)); + } for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. + // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -320,19 +320,26 @@ legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. + assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies - if (EnableGVNHoist) - MPM.add(createGVNHoistPass()); - if (EnableGVNSink) { - MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + + if (OptLevel > 1) { + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } } - // Speculative execution if the target has divergent branches; otherwise nop. - MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); - MPM.add(createJumpThreadingPass()); // Thread jumps. - MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + if (OptLevel > 1) { + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) @@ -346,8 +353,10 @@ if (SizeLevel == 0) MPM.add(createPGOMemOPSizeOptLegacyPass()); - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (OptLevel > 1) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Begin the loop pass pipeline. @@ -360,6 +369,7 @@ } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); @@ -402,16 +412,19 @@ // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createJumpThreadingPass()); // Thread jumps - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + if (OptLevel > 1) { + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); if (RerollLoops) MPM.add(createLoopRerollPass()); + // TODO: Investigate if this is too expensive at O1. MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. @@ -899,7 +912,8 @@ // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. - PM.add(createTailCallEliminationPass()); + if (OptLevel > 1) + PM.add(createTailCallEliminationPass()); // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -3,17 +3,17 @@ ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos -; GCN-POSTLINK: tail call fast float @_Z3sinf( -; GCN-POSTLINK: tail call fast float @_Z3cosf( +; GCN-POSTLINK: call fast float @_Z3sinf( +; GCN-POSTLINK: call fast float @_Z3cosf( ; GCN-PRELINK: call fast float @_Z6sincosfPf( -; GCN-NATIVE: tail call fast float @_Z10native_sinf( -; GCN-NATIVE: tail call fast float @_Z10native_cosf( +; GCN-NATIVE: call fast float @_Z10native_sinf( +; GCN-NATIVE: call fast float @_Z10native_cosf( define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3sinf(float %tmp) + %call = call fast float @_Z3sinf(float %tmp) store float %call, float addrspace(1)* %a, align 4 - %call2 = tail call fast float @_Z3cosf(float %tmp) + %call2 = call fast float @_Z3cosf(float %tmp) %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 store float %call2, float addrspace(1)* %arrayidx3, align 4 ret void @@ -24,17 +24,17 @@ declare float @_Z3cosf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 -; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f( -; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f( +; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( +; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( -; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f( -; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f( +; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( +; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 - %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) + %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 - %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) + %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 ret void @@ -45,20 +45,20 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 -; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f( -; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f( +; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( +; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( -; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f( -; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f( +; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( +; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { entry: %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> - %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) + %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 - %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) + %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* @@ -71,17 +71,17 @@ declare <3 x float> @_Z3cosDv3_f(<3 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 -; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f( -; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f( +; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( +; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( -; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f( -; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f( +; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( +; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 - %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) + %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 - %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) + %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 ret void @@ -92,17 +92,17 @@ declare <4 x float> @_Z3cosDv4_f(<4 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 -; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f( -; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f( +; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( +; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( -; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f( -; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f( +; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( +; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 - %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) + %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 - %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) + %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 ret void @@ -113,17 +113,17 @@ declare <8 x float> @_Z3cosDv8_f(<8 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 -; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f( -; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f( +; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( +; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( -; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f( -; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f( +; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( +; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 - %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) + %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 - %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) + %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 ret void @@ -137,7 +137,7 @@ ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { entry: - %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) + %call = call fast float @_Z12native_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -148,7 +148,7 @@ ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { entry: - %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) + %call = call fast float @_Z10half_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -160,7 +160,7 @@ define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) + %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -172,7 +172,7 @@ define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) + %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -184,7 +184,7 @@ define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -196,7 +196,7 @@ define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -208,7 +208,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -220,7 +220,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -231,7 +231,7 @@ define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -242,7 +242,7 @@ define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -254,7 +254,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -266,31 +266,31 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) -; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) +; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) + %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) -; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) +; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) + %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -305,7 +305,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01) + %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -320,7 +320,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01) + %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -337,7 +337,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z4pownfi(float %tmp, i32 11) + %call = call fast float @_Z4pownfi(float %tmp, i32 11) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -345,11 +345,11 @@ declare float @_Z4pownfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) -; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03) +; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 @@ -359,39 +359,39 @@ define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) + %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr -; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) +; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 -; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown -; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv) +; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv) ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 -; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) +; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] @@ -405,7 +405,7 @@ %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 %conv = fptosi float %tmp1 to i32 - %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv) + %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -417,7 +417,7 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1) + %call = call fast float @_Z5rootnfi(float %tmp, i32 1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -425,23 +425,23 @@ declare float @_Z5rootnfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2) -; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) +; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2) + %call = call fast float @_Z5rootnfi(float %tmp, i32 2) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3) -; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) +; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp) define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3) + %call = call fast float @_Z5rootnfi(float %tmp, i32 3) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -451,18 +451,18 @@ define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1) + %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2) -; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2) +; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2) + %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -472,7 +472,7 @@ define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -484,7 +484,7 @@ define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) + %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -494,7 +494,7 @@ define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -506,7 +506,7 @@ define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) + %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -516,7 +516,7 @@ define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) + %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -526,7 +526,7 @@ define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -538,17 +538,17 @@ %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 %tmp1 = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) + %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp -; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp) define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3expf(float %tmp) + %call = call fast float @_Z3expf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -556,11 +556,11 @@ declare float @_Z3expf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 -; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp) define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4exp2f(float %tmp) + %call = call fast float @_Z4exp2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -568,11 +568,11 @@ declare float @_Z4exp2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 -; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5exp10f(float %tmp) + %call = call fast float @_Z5exp10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -580,11 +580,11 @@ declare float @_Z5exp10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log -; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp) define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3logf(float %tmp) + %call = call fast float @_Z3logf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -592,11 +592,11 @@ declare float @_Z3logf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 -; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp) define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4log2f(float %tmp) + %call = call fast float @_Z4log2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -604,11 +604,11 @@ declare float @_Z4log2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 -; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp) define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5log10f(float %tmp) + %call = call fast float @_Z5log10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -617,36 +617,36 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr ; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 -; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt -; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp) define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4sqrtf(float %tmp) + %call = call fast float @_Z4sqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 -; GCN: tail call fast double @_Z4sqrtd(double %tmp) +; GCN: call fast double @_Z4sqrtd(double %tmp) define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { entry: %tmp = load double, double addrspace(1)* %a, align 8 - %call = tail call fast double @_Z4sqrtd(double %tmp) + %call = call fast double @_Z4sqrtd(double %tmp) store double %call, double addrspace(1)* %a, align 8 ret void } @@ -655,11 +655,11 @@ declare double @_Z4sqrtd(double) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt -; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rsqrtf(float %tmp) + %call = call fast float @_Z5rsqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -667,11 +667,11 @@ declare float @_Z5rsqrtf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan -; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3tanf(float %tmp) + %call = call fast float @_Z3tanf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -679,14 +679,14 @@ declare float @_Z3tanf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos -; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp) -; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp) +; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) +; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* - %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) + %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -703,10 +703,10 @@ entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) - %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) + %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) + %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) ret void } @@ -725,10 +725,10 @@ entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 - %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 + %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 + %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 ret void } @@ -755,31 +755,31 @@ define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { entry: %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* - %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 + %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* - %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 + %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* - %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 + %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* - %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 + %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* - %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 + %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* - %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 + %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* - %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 + %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* - %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 + %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* - %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 + %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 ret void } diff --git a/llvm/test/Feature/optnone-opt.ll b/llvm/test/Feature/optnone-opt.ll --- a/llvm/test/Feature/optnone-opt.ll +++ b/llvm/test/Feature/optnone-opt.ll @@ -39,16 +39,10 @@ ; IR passes run at -O1 and higher. ; OPT-O1-DAG: Skipping pass 'Aggressive Dead Code Elimination' ; OPT-O1-DAG: Skipping pass 'Combine redundant instructions' -; OPT-O1-DAG: Skipping pass 'Dead Store Elimination' ; OPT-O1-DAG: Skipping pass 'Early CSE' -; OPT-O1-DAG: Skipping pass 'Jump Threading' -; OPT-O1-DAG: Skipping pass 'MemCpy Optimization' ; OPT-O1-DAG: Skipping pass 'Reassociate expressions' ; OPT-O1-DAG: Skipping pass 'Simplify the CFG' ; OPT-O1-DAG: Skipping pass 'Sparse Conditional Constant Propagation' -; OPT-O1-DAG: Skipping pass 'SROA' -; OPT-O1-DAG: Skipping pass 'Tail Call Elimination' -; OPT-O1-DAG: Skipping pass 'Value Propagation' ; Additional IR passes run at -O2 and higher. ; OPT-O2O3-DAG: Skipping pass 'Global Value Numbering' diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -12,66 +12,70 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ -; RUN: --check-prefix=CHECK-O2-LTO +; RUN: --check-prefix=CHECK-O2-LTO --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-peephole='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PEEPHOLE +; RUN: --check-prefix=CHECK-EP-PEEPHOLE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-LATE +; RUN: --check-prefix=CHECK-EP-LOOP-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-END +; RUN: --check-prefix=CHECK-EP-LOOP-END --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-SCALAR-LATE +; RUN: --check-prefix=CHECK-EP-SCALAR-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-CGSCC-LATE +; RUN: --check-prefix=CHECK-EP-CGSCC-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-vectorizer-start='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-VECTORIZER-START +; RUN: --check-prefix=CHECK-EP-VECTORIZER-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START +; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START +; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-optimizer-last='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST +; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -132,10 +136,10 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -143,7 +147,7 @@ ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: TailCallElimPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -180,22 +184,10 @@ ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo -; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-Os-NEXT: Running pass: GVN -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-Oz-NEXT: Running pass: GVN -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-O2-NEXT: Running pass: GVN -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-O3-NEXT: Running pass: GVN -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-O23SZ-NEXT: Running pass: GVN +; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: MemCpyOptPass ; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis @@ -204,14 +196,14 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O-NEXT: Running pass: DSEPass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O-NEXT: Starting llvm::Function pass manager run. -; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -13,19 +13,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O2 ; ; Postlink pipelines: ; RUN: opt -disable-verify -debug-pass-manager \ @@ -33,19 +33,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,CHECK-POSTLINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; RUN: opt -disable-verify -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-POSTLINK-O,CHECK-POSTLINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -112,17 +112,17 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass -; CHECK-O-NEXT: Running pass: TailCallElimPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -180,14 +180,14 @@ ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O-NEXT: Running pass: DSEPass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O-NEXT: Starting llvm::Function pass manager run -; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O-NEXT: Finished llvm::Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O1 -S | FileCheck %s +; RUN: opt < %s -O2 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll --- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll +++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll @@ -7,7 +7,7 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) { ; ALL-LABEL: @PR33605( -; ALL-NEXT: for.body: +; ALL-NEXT: entry: ; ALL-NEXT: [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 ; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 @@ -15,16 +15,16 @@ ; ALL-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; ALL: if.then: ; ALL-NEXT: store i32 [[OR]], i32* [[ARRAYIDX]], align 4 -; ALL-NEXT: tail call void @foo() +; ALL-NEXT: call void @foo() ; ALL-NEXT: br label [[IF_END]] ; ALL: if.end: -; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ] +; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ] ; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4 ; ALL-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]] ; ALL-NEXT: br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]] ; ALL: if.then.1: ; ALL-NEXT: store i32 [[OR]], i32* [[C]], align 4 -; ALL-NEXT: tail call void @foo() +; ALL-NEXT: call void @foo() ; ALL-NEXT: br label [[IF_END_1]] ; ALL: if.end.1: ; ALL-NEXT: [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll --- a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll @@ -74,7 +74,7 @@ define i32 @two_shifts_by_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]] @@ -101,7 +101,7 @@ define i32 @two_shifts_by_same_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_same_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]]