diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1499,7 +1499,8 @@ Follow = true; return true; } - if (isa(UserI)) + if (isa(UserI) || isa(UserI) || + isa(UserI)) return true; // Unknown user. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -8,7 +8,7 @@ ; IS________OPM-LABEL: define {{[^@]+}}@f() { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[A:%.*]] = alloca i32, align 1 -; IS________OPM-NEXT: call void @g(i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A]]) +; IS________OPM-NEXT: call void @g(i32* noalias nocapture nofree noundef nonnull readonly dereferenceable(4) [[A]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@f() { @@ -26,7 +26,7 @@ define internal void @g(i32* %a) { ; IS________OPM-LABEL: define {{[^@]+}}@g -; IS________OPM-SAME: (i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) { +; IS________OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly dereferenceable(4) [[A:%.*]]) { ; IS________OPM-NEXT: [[AA:%.*]] = load i32, i32* [[A]], align 1 ; IS________OPM-NEXT: call void @z(i32 [[AA]]) ; IS________OPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll @@ -8,7 +8,7 @@ define internal void @test(i32** %X) !dbg !2 { ; CHECK-LABEL: define {{[^@]+}}@test -; CHECK-SAME: (i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[X:%.*]]) !dbg [[DBG3:![0-9]+]] { +; CHECK-SAME: (i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[X:%.*]]) !dbg [[DBG3:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 ; CHECK-NEXT: call void @sink(i32 [[TMP2]]) @@ -33,14 +33,14 @@ define void @caller(i32** %Y, %struct.pair* %P) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller -; IS__TUNIT____-SAME: (i32** nocapture readonly [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) { -; IS__TUNIT____-NEXT: call void @test(i32** nocapture readonly align 8 [[Y]]), !dbg [[DBG4:![0-9]+]] +; IS__TUNIT____-SAME: (i32** nocapture nofree readonly [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) { +; IS__TUNIT____-NEXT: call void @test(i32** nocapture nofree readonly align 8 [[Y]]), !dbg [[DBG4:![0-9]+]] ; IS__TUNIT____-NEXT: call void @test_byval(), !dbg [[DBG5:![0-9]+]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller -; IS__CGSCC____-SAME: (i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) { -; IS__CGSCC____-NEXT: call void @test(i32** nocapture noundef nonnull readonly align 8 dereferenceable(8) [[Y]]), !dbg [[DBG4:![0-9]+]] +; IS__CGSCC____-SAME: (i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]], %struct.pair* nocapture nofree readnone [[P:%.*]]) { +; IS__CGSCC____-NEXT: call void @test(i32** nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y]]), !dbg [[DBG4:![0-9]+]] ; IS__CGSCC____-NEXT: call void @test_byval(), !dbg [[DBG5:![0-9]+]] ; IS__CGSCC____-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -25,7 +25,7 @@ ; IS________OPM-LABEL: define {{[^@]+}}@run ; IS________OPM-SAME: () #[[ATTR0:[0-9]+]] { ; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nocapture nofree noundef nonnull readonly byval(%struct.Foo) align 8 dereferenceable(16) @a) #[[ATTR0]] +; IS________OPM-NEXT: [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nocapture nofree noundef nonnull readonly byval([[STRUCT_FOO:%.*]]) align 8 dereferenceable(16) @a) #[[ATTR0]] ; IS________OPM-NEXT: unreachable ; ; IS__TUNIT_NPM: Function Attrs: nofree noreturn nosync nounwind readnone diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -11,7 +11,7 @@ ; IS________OPM-LABEL: define {{[^@]+}}@caller() { ; IS________OPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; IS________OPM-NEXT: store i32 42, i32* [[X]], align 4 -; IS________OPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof [[PROF0:![0-9]+]] +; IS________OPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof [[PROF0:![0-9]+]] ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@caller() { @@ -29,7 +29,7 @@ define internal void @promote_i32_ptr(i32* %xp) { ; IS________OPM-LABEL: define {{[^@]+}}@promote_i32_ptr -; IS________OPM-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) { +; IS________OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) { ; IS________OPM-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; IS________OPM-NEXT: call void @use_i32(i32 [[X]]) ; IS________OPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll @@ -52,19 +52,28 @@ ; IS__TUNIT_OPM-NEXT: call void @bar(%pair* noalias nocapture nonnull readonly byval([[PAIR]]) dereferenceable(8) [[DATA]]) ; IS__TUNIT_OPM-NEXT: ret void ; -; IS________NPM-LABEL: define {{[^@]+}}@zed -; IS________NPM-SAME: (%pair* noalias nocapture nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) { -; IS________NPM-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32* -; IS________NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1 -; IS________NPM-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 -; IS________NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1 -; IS________NPM-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) -; IS________NPM-NEXT: ret void +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@zed +; IS__TUNIT_NPM-SAME: (%pair* noalias nocapture nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) { +; IS__TUNIT_NPM-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32* +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1 +; IS__TUNIT_NPM-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1 +; IS__TUNIT_NPM-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) +; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@zed ; IS__CGSCC_OPM-SAME: (%pair* noalias nocapture noundef nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) { ; IS__CGSCC_OPM-NEXT: call void @bar(%pair* noalias nocapture noundef nonnull readonly byval([[PAIR]]) dereferenceable(8) [[DATA]]) ; IS__CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@zed +; IS__CGSCC_NPM-SAME: (%pair* noalias nocapture nofree nonnull readonly byval([[PAIR:%.*]]) dereferenceable(8) [[DATA:%.*]]) { +; IS__CGSCC_NPM-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32* +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1 +; IS__CGSCC_NPM-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1 +; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1 +; IS__CGSCC_NPM-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) +; IS__CGSCC_NPM-NEXT: ret void ; call void @bar(%pair* byval(%pair) %Data) ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -41,7 +41,7 @@ ; IS__TUNIT_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo @@ -52,7 +52,7 @@ ; IS__TUNIT_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@foo @@ -63,7 +63,7 @@ ; IS__CGSCC_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024) +; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo @@ -74,7 +74,7 @@ ; IS__CGSCC_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024) +; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 noundef 4617315517961601024) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -89,7 +89,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { ; IS________OPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS________OPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { +; IS________OPM-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; IS________OPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -150,7 +150,7 @@ ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS________NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { +; IS________NPM-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; IS________NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -25,7 +25,7 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -37,7 +37,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller @@ -49,7 +49,7 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -61,7 +61,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -80,7 +80,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; IS________OPM-LABEL: define {{[^@]+}}@t0_callback_callee -; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -89,7 +89,7 @@ ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@t0_callback_callee -; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -125,7 +125,7 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -137,7 +137,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -149,7 +149,7 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -161,7 +161,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -181,7 +181,7 @@ ; ; IS________OPM: Function Attrs: nosync ; IS________OPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -191,7 +191,7 @@ ; ; IS________NPM: Function Attrs: nosync ; IS________NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -225,7 +225,7 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -237,7 +237,7 @@ ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -249,7 +249,7 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -261,7 +261,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -282,7 +282,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; IS________OPM-LABEL: define {{[^@]+}}@t2_callback_callee -; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -291,7 +291,7 @@ ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@t2_callback_callee -; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -325,8 +325,8 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -338,8 +338,8 @@ ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -351,8 +351,8 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -364,8 +364,8 @@ ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -387,7 +387,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; IS________OPM-LABEL: define {{[^@]+}}@t3_callback_callee -; IS________OPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________OPM-NEXT: entry: ; IS________OPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________OPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -396,7 +396,7 @@ ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@t3_callback_callee -; IS________NPM-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { +; IS________NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -713,7 +713,7 @@ define void @test16b(i8 %v, i8** %P) { ; CHECK-LABEL: define {{[^@]+}}@test16b -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { +; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) @@ -729,7 +729,7 @@ define void @test16c(i8 %v, i8** %P) { ; IS________OPM-LABEL: define {{[^@]+}}@test16c -; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { +; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; IS________OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR5]] @@ -737,7 +737,7 @@ ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test16c -; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { +; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR6]] @@ -752,7 +752,7 @@ define void @test16d(i8 %v, i8** %P) { ; CHECK-LABEL: define {{[^@]+}}@test16d -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { +; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -178,7 +178,7 @@ define dso_local i8* @internal_argmem_only_read(i32* %arg) { ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_read -; CHECK-SAME: (i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[ARG]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[TMP]] to i64 @@ -195,7 +195,7 @@ define dso_local i8* @internal_argmem_only_write(i32* %arg) { ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_write -; CHECK-SAME: (i32* nocapture noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 10, i32* [[ARG]], align 4 ; CHECK-NEXT: [[CALL:%.*]] = call noalias dereferenceable_or_null(10) i8* @malloc(i64 noundef 10) @@ -210,16 +210,16 @@ define dso_local i8* @internal_argmem_only_rec(i32* %arg) { ; IS__TUNIT____: Function Attrs: inaccessiblemem_or_argmemonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_argmem_only_rec -; IS__TUNIT____-SAME: (i32* nocapture [[ARG:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-SAME: (i32* nocapture nofree [[ARG:%.*]]) #[[ATTR1]] { ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture align 4 [[ARG]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree align 4 [[ARG]]) ; IS__TUNIT____-NEXT: ret i8* [[CALL]] ; ; IS__CGSCC____: Function Attrs: inaccessiblemem_or_argmemonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_argmem_only_rec -; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG]]) ; IS__CGSCC____-NEXT: ret i8* [[CALL]] ; entry: @@ -230,7 +230,7 @@ define internal i8* @internal_argmem_only_rec_1(i32* %arg) { ; IS__TUNIT____: Function Attrs: inaccessiblemem_or_argmemonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1 -; IS__TUNIT____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP:%.*]] = load i32, i32* [[ARG]], align 4 ; IS__TUNIT____-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP]], 0 @@ -243,7 +243,7 @@ ; IS__TUNIT____-NEXT: br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; IS__TUNIT____: if.then2: ; IS__TUNIT____-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nonnull align 4 dereferenceable(4) [[ADD_PTR]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nofree nonnull align 4 dereferenceable(4) [[ADD_PTR]]) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: if.end3: ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARG]], align 4 @@ -256,7 +256,7 @@ ; ; IS__CGSCC____: Function Attrs: inaccessiblemem_or_argmemonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1 -; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[TMP:%.*]] = load i32, i32* [[ARG]], align 4 ; IS__CGSCC____-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP]], 0 @@ -269,7 +269,7 @@ ; IS__CGSCC____-NEXT: br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; IS__CGSCC____: if.then2: ; IS__CGSCC____-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ADD_PTR]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_2(i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ADD_PTR]]) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: if.end3: ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARG]], align 4 @@ -312,11 +312,11 @@ define internal i8* @internal_argmem_only_rec_2(i32* %arg) { ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_2 -; CHECK-SAME: (i32* nocapture noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, i32* [[ARG]], align 4 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARG]], i64 -1 -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nonnull align 4 dereferenceable(4) [[ADD_PTR]]) +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree nonnull align 4 dereferenceable(4) [[ADD_PTR]]) ; CHECK-NEXT: ret i8* [[CALL]] ; entry: diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -854,13 +854,13 @@ define void @ptr_uses(i8* %ptr, i8* %wptr) { ; IS__TUNIT____: Function Attrs: nounwind willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_uses -; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR12:[0-9]+]] { +; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR12:[0-9]+]] { ; IS__TUNIT____-NEXT: store i8 0, i8* [[WPTR]], align 1 ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@ptr_uses -; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14]] { +; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14]] { ; IS__CGSCC____-NEXT: store i8 0, i8* [[WPTR]], align 1 ; IS__CGSCC____-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -347,7 +347,7 @@ define noalias i32* @test13(i64* nocapture readonly %a) { ; CHECK: Function Attrs: nounwind ; CHECK-LABEL: define {{[^@]+}}@test13 -; CHECK-SAME: (i64* nocapture noundef nonnull readonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[A]], align 8 ; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i64 [[TMP0]]) #[[ATTR2]] diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -909,7 +909,7 @@ define i8 @parent6(i8* %a, i8* %b) { ; CHECK-LABEL: define {{[^@]+}}@parent6 -; CHECK-SAME: (i8* nonnull [[A:%.*]], i8* noundef [[B:%.*]]) { +; CHECK-SAME: (i8* nonnull [[A:%.*]], i8* nofree noundef [[B:%.*]]) { ; CHECK-NEXT: [[C:%.*]] = load volatile i8, i8* [[B]], align 1 ; CHECK-NEXT: call void @use1nonnull(i8* nonnull [[A]]) ; CHECK-NEXT: ret i8 [[C]] diff --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll --- a/llvm/test/Transforms/Attributor/norecurse.ll +++ b/llvm/test/Transforms/Attributor/norecurse.ll @@ -73,7 +73,7 @@ ; CHECK: Function Attrs: argmemonly nofree nosync nounwind willreturn ; CHECK-LABEL: define {{[^@]+}}@intrinsic ; CHECK-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR5:[0-9]+]] { -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR10:[0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR11:[0-9]+]] ; CHECK-NEXT: ret void ; call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false) @@ -93,7 +93,7 @@ ; ; IS__CGSCC____: Function Attrs: norecurse nosync readnone ; IS__CGSCC____-LABEL: define {{[^@]+}}@called_by_norecurse -; IS__CGSCC____-SAME: () #[[ATTR6:[0-9]+]] { +; IS__CGSCC____-SAME: () #[[ATTR7:[0-9]+]] { ; IS__CGSCC____-NEXT: [[A:%.*]] = call i32 @k() ; IS__CGSCC____-NEXT: ret i32 undef ; @@ -103,13 +103,13 @@ define void @m() norecurse { ; IS__TUNIT____: Function Attrs: norecurse nosync readnone ; IS__TUNIT____-LABEL: define {{[^@]+}}@m -; IS__TUNIT____-SAME: () #[[ATTR6:[0-9]+]] { +; IS__TUNIT____-SAME: () #[[ATTR7:[0-9]+]] { ; IS__TUNIT____-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR3]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync readnone ; IS__CGSCC____-LABEL: define {{[^@]+}}@m -; IS__CGSCC____-SAME: () #[[ATTR6]] { +; IS__CGSCC____-SAME: () #[[ATTR7]] { ; IS__CGSCC____-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() ; IS__CGSCC____-NEXT: ret void ; @@ -136,7 +136,7 @@ ; ; IS__CGSCC____: Function Attrs: norecurse nosync readnone ; IS__CGSCC____-LABEL: define {{[^@]+}}@o -; IS__CGSCC____-SAME: () #[[ATTR6]] { +; IS__CGSCC____-SAME: () #[[ATTR7]] { ; IS__CGSCC____-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() ; IS__CGSCC____-NEXT: ret i32 [[A]] ; @@ -146,13 +146,13 @@ define i32 @p() norecurse { ; IS__TUNIT____: Function Attrs: norecurse nosync readnone ; IS__TUNIT____-LABEL: define {{[^@]+}}@p -; IS__TUNIT____-SAME: () #[[ATTR6]] { +; IS__TUNIT____-SAME: () #[[ATTR7]] { ; IS__TUNIT____-NEXT: [[A:%.*]] = call i32 @o() #[[ATTR3]] ; IS__TUNIT____-NEXT: ret i32 [[A]] ; ; IS__CGSCC____: Function Attrs: norecurse nosync readnone ; IS__CGSCC____-LABEL: define {{[^@]+}}@p -; IS__CGSCC____-SAME: () #[[ATTR6]] { +; IS__CGSCC____-SAME: () #[[ATTR7]] { ; IS__CGSCC____-NEXT: [[A:%.*]] = call i32 @o() ; IS__CGSCC____-NEXT: ret i32 [[A]] ; @@ -163,7 +163,7 @@ define void @f(i32 %x) { ; CHECK: Function Attrs: nofree nosync nounwind readnone ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[X:%.*]]) #[[ATTR7:[0-9]+]] { +; CHECK-SAME: (i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 @@ -171,7 +171,7 @@ ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: call void @g() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @g() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret void @@ -194,9 +194,9 @@ define void @g() norecurse { ; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone ; CHECK-LABEL: define {{[^@]+}}@g -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR9]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @f(i32 noundef 0) #[[ATTR7]] +; CHECK-NEXT: call void @f(i32 noundef 0) #[[ATTR8]] ; CHECK-NEXT: ret void ; entry: @@ -225,7 +225,7 @@ define i32 @eval_func2(i32 (i32)* , i32) local_unnamed_addr null_pointer_is_valid{ ; CHECK: Function Attrs: null_pointer_is_valid ; CHECK-LABEL: define {{[^@]+}}@eval_func2 -; CHECK-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] { +; CHECK-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 [[TMP0]](i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] ; @@ -263,11 +263,12 @@ ; IS__TUNIT____: attributes #[[ATTR3]] = { nosync readnone } ; IS__TUNIT____: attributes #[[ATTR4:[0-9]+]] = { readnone } ; IS__TUNIT____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn } -; IS__TUNIT____: attributes #[[ATTR6]] = { norecurse nosync readnone } -; IS__TUNIT____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone } -; IS__TUNIT____: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone } -; IS__TUNIT____: attributes #[[ATTR9]] = { null_pointer_is_valid } -; IS__TUNIT____: attributes #[[ATTR10]] = { willreturn } +; IS__TUNIT____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn } +; IS__TUNIT____: attributes #[[ATTR7]] = { norecurse nosync readnone } +; IS__TUNIT____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone } +; IS__TUNIT____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone } +; IS__TUNIT____: attributes #[[ATTR10]] = { null_pointer_is_valid } +; IS__TUNIT____: attributes #[[ATTR11]] = { willreturn } ;. ; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } @@ -275,9 +276,10 @@ ; IS__CGSCC____: attributes #[[ATTR3]] = { nosync readnone } ; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { readnone } ; IS__CGSCC____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn } -; IS__CGSCC____: attributes #[[ATTR6]] = { norecurse nosync readnone } -; IS__CGSCC____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone } -; IS__CGSCC____: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone } -; IS__CGSCC____: attributes #[[ATTR9]] = { null_pointer_is_valid } -; IS__CGSCC____: attributes #[[ATTR10]] = { willreturn } +; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR7]] = { norecurse nosync readnone } +; IS__CGSCC____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone } +; IS__CGSCC____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone } +; IS__CGSCC____: attributes #[[ATTR10]] = { null_pointer_is_valid } +; IS__CGSCC____: attributes #[[ATTR11]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -2419,7 +2419,7 @@ define void @spam(i32* %arg, i32* %arg1) { ; CHECK-LABEL: define {{[^@]+}}@spam -; CHECK-SAME: (i32* nocapture noundef nonnull readonly align 8 dereferenceable(4) [[ARG:%.*]], i32* nocapture nofree readnone [[ARG1:%.*]]) { +; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 8 dereferenceable(4) [[ARG:%.*]], i32* nocapture nofree readnone [[ARG1:%.*]]) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[ARG]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP]], 4 diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -96,7 +96,7 @@ ; This is not a missed optz'n. define void @test6_2(i8** %p, i8* %q) { ; CHECK-LABEL: define {{[^@]+}}@test6_2 -; CHECK-SAME: (i8** nocapture noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* [[Q:%.*]]) { +; CHECK-SAME: (i8** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* nofree [[Q:%.*]]) { ; CHECK-NEXT: store i8* [[Q]], i8** [[P]], align 8 ; CHECK-NEXT: call void @test6_1() ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -522,13 +522,13 @@ ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__TUNIT_OPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] { -; IS__TUNIT_OPM-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret(%struct.X) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR6:[0-9]+]] +; IS__TUNIT_OPM-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR6:[0-9]+]] ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__TUNIT_NPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] { -; IS__TUNIT_NPM-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret(%struct.X) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]] +; IS__TUNIT_NPM-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]] ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -399,7 +399,7 @@ define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 { ; IS__TUNIT____: Function Attrs: noinline nounwind uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@conditional_exit -; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]], i32* nocapture readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { +; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { ; IS__TUNIT____-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS__TUNIT____-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS__TUNIT____: 4: @@ -417,7 +417,7 @@ ; ; IS__CGSCC____: Function Attrs: noinline nounwind uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@conditional_exit -; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i32* nocapture readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { +; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] { ; IS__CGSCC____-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS__CGSCC____-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS__CGSCC____: 4: diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -27,9 +27,17 @@ define void @delete_parallel_0() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_0() { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_0() { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: ret void entry: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined.willreturn.0 to void (i32*, i32*, ...)*)) @@ -40,11 +48,21 @@ define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR0:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @unknown() [[ATTR0]] +; CHECK-NEXT: call void @unknown() #[[ATTR0]] ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @unknown() #[[ATTR0]] +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @unknown() #[[ATTR0]] +; CHECK2-NEXT: ret void entry: call void @unknown() willreturn ret void @@ -52,11 +70,21 @@ define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR1:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() [[ATTR4:#.*]] +; CHECK-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] +; CHECK2-NEXT: ret void entry: call void @readonly() ret void @@ -64,11 +92,21 @@ define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR2:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readnone() [[ATTR0]] +; CHECK-NEXT: call void @readnone() #[[ATTR0]] ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @readnone() #[[ATTR0]] +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @readnone() #[[ATTR0]] +; CHECK2-NEXT: ret void entry: call void @readnone() willreturn ret void @@ -76,10 +114,18 @@ define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: ret void entry: ret void } @@ -99,11 +145,23 @@ define void @delete_parallel_1() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_1() { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_1() { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK2-NEXT: ret void entry: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) @@ -119,6 +177,16 @@ ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @unknown() +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @unknown() +; CHECK2-NEXT: ret void entry: call void @unknown() ret void @@ -126,11 +194,21 @@ define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR4]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() [[ATTR4]] +; CHECK-NEXT: call void @readonly() #[[ATTR4]] ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @readonly() #[[ATTR4]] +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..0 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @readonly() #[[ATTR4]] +; CHECK2-NEXT: ret void entry: call void @readonly() ret void @@ -138,11 +216,21 @@ define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR5:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readnone() ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: call void @readnone() +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: call void @readnone() +; CHECK2-NEXT: ret void entry: call void @readnone() ret void @@ -150,10 +238,18 @@ define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: ret void entry: ret void } @@ -188,16 +284,42 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) [[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] ; CHECK-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@delete_parallel_2() { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] +; CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@delete_parallel_2() { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] +; CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) +; CHECK2-NEXT: ret void entry: %a = alloca i32, align 4 %tmp = bitcast i32* %a to i8* @@ -214,9 +336,9 @@ define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) [[ATTR6:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() [[ATTR12:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: @@ -227,6 +349,32 @@ ; CHECK: if.end: ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] +; CHECK1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK1: if.then: +; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP]], 1 +; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK1-NEXT: br label [[IF_END]] +; CHECK1: if.end: +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] +; CHECK2-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK2: if.then: +; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP]], 1 +; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK2-NEXT: br label [[IF_END]] +; CHECK2: if.end: +; CHECK2-NEXT: ret void entry: %call = call i32 @omp_get_thread_num() %cmp = icmp eq i32 %call, 0 @@ -244,21 +392,51 @@ define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] +; CHECK1: omp_if.then: +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: br label [[OMP_IF_END]] +; CHECK1: omp_if.end: +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] +; CHECK2: omp_if.then: +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: br label [[OMP_IF_END]] +; CHECK2: omp_if.end: +; CHECK2-NEXT: ret void entry: %tmp = load i32, i32* %.global_tid., align 4 %tmp1 = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 %tmp) @@ -286,23 +464,57 @@ define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB0]]) [[ATTR12]] +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull [[GLOB1:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK1-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] +; CHECK1: omp_if.then: +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK1-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK1-NEXT: br label [[OMP_IF_END]] +; CHECK1: omp_if.end: +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK2-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] +; CHECK2: omp_if.then: +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK2-NEXT: store i32 [[INC]], i32* [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) +; CHECK2-NEXT: br label [[OMP_IF_END]] +; CHECK2: omp_if.end: +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: ret void entry: %omp_global_thread_num = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) %tmp = load i32, i32* %.global_tid., align 4 @@ -324,18 +536,18 @@ define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) [[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] ; CHECK-NEXT: store i32 1, i32* [[A1]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** ; CHECK-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2:@.*]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] ; CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -345,7 +557,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK: .omp.reduction.case2: ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 @@ -356,6 +568,70 @@ ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK1-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +; CHECK1-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] +; CHECK1-NEXT: store i32 1, i32* [[A1]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** +; CHECK1-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 +; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK1-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +; CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +; CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +; CHECK1-NEXT: ] +; CHECK1: .omp.reduction.case1: +; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] +; CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +; CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +; CHECK1: .omp.reduction.case2: +; CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK1-NEXT: [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4 +; CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +; CHECK1: .omp.reduction.default: +; CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[A1]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK2-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +; CHECK2-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] +; CHECK2-NEXT: store i32 1, i32* [[A1]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** +; CHECK2-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK2-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +; CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +; CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +; CHECK2-NEXT: ] +; CHECK2: .omp.reduction.case1: +; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] +; CHECK2-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +; CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +; CHECK2: .omp.reduction.case2: +; CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 +; CHECK2-NEXT: [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic, align 4 +; CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +; CHECK2: .omp.reduction.default: +; CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[A1]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP9]]) +; CHECK2-NEXT: ret void entry: %a1 = alloca i32, align 4 %.omp.reduction.red_list = alloca [1 x i8*], align 8 @@ -393,7 +669,7 @@ define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) { ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) [[ATTR9:#.*]] { +; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** ; CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 @@ -405,6 +681,30 @@ ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 ; CHECK-NEXT: ret void ; +; CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func +; CHECK1-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** +; CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 +; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32** +; CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] +; CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +; CHECK1-NEXT: ret void +; CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func +; CHECK2-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** +; CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 +; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32** +; CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8 +; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] +; CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP4]], align 4 +; CHECK2-NEXT: ret void entry: %tmp = bitcast i8* %arg1 to i32** %tmp2 = load i32*, i32** %tmp, align 8 diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s -; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging < %s | FileCheck %s +; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s --check-prefix=CHECK1 +; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging < %s | FileCheck %s --check-prefix=CHECK2 ; #include ; void foo(); ; void use(int); @@ -792,10 +792,2935 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1:@.*]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float +; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 +; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 +; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK: omp_region.end4: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split.split: +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body5: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK: seq.par.merged2: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] +; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK: omp_region.body5.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: br label [[OMP_REGION_END4]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float +; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 +; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 +; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK: omp_region.end4: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split.split: +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body5: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK: seq.par.merged2: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] +; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK: omp_region.body5.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: br label [[OMP_REGION_END4]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float +; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 +; CHECK-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK-NEXT: call void @use(i32 [[CONV]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP1]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 +; CHECK-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK: omp_region.end: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split: +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK: omp_region.end4: +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split.split: +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK: omp_region.body5: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK: seq.par.merged2: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] +; CHECK-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK: omp.par.merged.split.split.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK: omp_region.body5.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: br label [[OMP_REGION_END4]] +; CHECK: omp_region.body: +; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK: seq.par.merged: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK: omp.par.merged.split: +; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK: omp_region.body.split: +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: br label [[OMP_REGION_END]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK: omp.par.outlined.exit: +; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK: omp.par.exit.split: +; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK: entry.split.split: +; CHECK-NEXT: call void (...) @foo() +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: omp.par.entry: +; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK: omp.par.outlined.exit.exitStub: +; CHECK-NEXT: ret void +; CHECK: omp.par.region: +; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK: omp.par.merged: +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK: omp.par.region.split: +; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK: omp.par.pre_finalize: +; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @use(i32 [[TMP0]]) +; CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@merge +; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK: omp_parallel: +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -803,10 +3728,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0:#.*]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -818,10 +3741,10 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -829,116 +3752,92 @@ ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -948,10 +3847,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -963,19 +3860,19 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -993,35 +3890,29 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..9 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float -; CHECK-SAME: (float [[F:%.*]], float* nocapture writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1029,10 +3920,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1044,19 +3933,19 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1074,38 +3963,32 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1115,10 +3998,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1130,16 +4011,16 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: ; CHECK-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -1163,35 +4044,29 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..12 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..13 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 ; CHECK-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1199,10 +4074,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 @@ -1215,19 +4088,19 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1248,38 +4121,32 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..15 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: ; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1289,10 +4156,8 @@ ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1304,19 +4169,19 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1335,28 +4200,22 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..16 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP1]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions ; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: @@ -1364,10 +4223,10 @@ ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1375,10 +4234,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1390,10 +4247,10 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1401,36 +4258,30 @@ ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..18 -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..19 -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq ; CHECK-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: @@ -1438,10 +4289,10 @@ ; CHECK-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1449,10 +4300,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1464,19 +4313,19 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1495,47 +4344,41 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..20 -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..21 -; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull [[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) ; CHECK-NEXT: ret void ; CHECK: if.end: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_3 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1543,10 +4386,8 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1558,13 +4399,13 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1572,32 +4413,24 @@ ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..22 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..23 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..24 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: @@ -1605,10 +4438,10 @@ ; CHECK-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1618,10 +4451,8 @@ ; CHECK-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1633,31 +4464,31 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK: omp_region.end: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split: -; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK: omp_region.end4: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK: omp.par.merged.split.split.split.split: -; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1676,7 +4507,7 @@ ; CHECK: omp.par.merged.split.split.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK: omp_region.body5.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK-NEXT: br label [[OMP_REGION_END4]] ; CHECK: omp_region.body: ; CHECK-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -1688,155 +4519,123 @@ ; CHECK: omp.par.merged.split: ; CHECK-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK: omp_region.body.split: -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* [[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: br label [[OMP_REGION_END]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..25 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..26 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..27 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_seq_call ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..28 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..29 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..30 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_proc_bind ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 noundef 3) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..31 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..32 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..33 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@unmergable_3_num_threads ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..34 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..35 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..36 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) ; CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK: omp_parallel: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK: omp.par.outlined.exit: ; CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1844,12 +4643,10 @@ ; CHECK-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] ; CHECK: entry.split.split: ; CHECK-NEXT: call void (...) @foo() -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) [[ATTR0]] { +; CHECK-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: omp.par.entry: ; CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 @@ -1861,10 +4658,10 @@ ; CHECK: omp.par.region: ; CHECK-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK: omp.par.merged: -; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB1]]) -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK: entry.split: ; CHECK-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -1872,28 +4669,2242 @@ ; CHECK-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] ; CHECK: omp.par.pre_finalize: ; CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..37 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..38 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void -; -; ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..39 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void +; CHECK1-LABEL: define {{[^@]+}}@merge +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_proc_bind +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_num_threads +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_seq_call +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (...) @foo() +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float +; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 +; CHECK1-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK1-NEXT: call void @use(i32 [[CONV]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK1-NEXT: call void @use(i32 [[CONV]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK1-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK1-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK1-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 +; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP1]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK1: if.then: +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK1-NEXT: ret void +; CHECK1: if.end: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK1: if.then: +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK1-NEXT: ret void +; CHECK1: if.end: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK1-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 +; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK1: if.then: +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK1-NEXT: ret void +; CHECK1: if.end: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK1-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK1-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK1: if.then: +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK1-NEXT: ret void +; CHECK1: if.end: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_3 +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq +; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1: omp_region.end: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split: +; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK1: omp_region.end4: +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split.split.split: +; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK1: omp_region.body5: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK1: seq.par.merged2: +; CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] +; CHECK1-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK1: omp.par.merged.split.split.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK1: omp_region.body5.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: br label [[OMP_REGION_END4]] +; CHECK1: omp_region.body: +; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK1: seq.par.merged: +; CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK1: omp.par.merged.split: +; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK1: omp_region.body.split: +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_seq_call +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (...) @foo() +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (...) @foo() +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_proc_bind +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@unmergable_3_num_threads +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1 +; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK1: omp_parallel: +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK1: omp.par.outlined.exit: +; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK1: omp.par.exit.split: +; CHECK1-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK1: entry.split.split: +; CHECK1-NEXT: call void (...) @foo() +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK1: omp.par.outlined.exit.exitStub: +; CHECK1-NEXT: ret void +; CHECK1: omp.par.region: +; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK1: omp.par.merged: +; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK1: entry.split: +; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK1: omp.par.region.split: +; CHECK1-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK1: omp.par.pre_finalize: +; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK1-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK1-NEXT: call void @use(i32 [[TMP0]]) +; CHECK1-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_proc_bind +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_num_threads +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_seq_call +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (...) @foo() +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float +; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 +; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000 +; CHECK2-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK2-NEXT: call void @use(i32 [[CONV]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[F]], align 4 +; CHECK2-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +; CHECK2-NEXT: call void @use(i32 [[CONV]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 +; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i64 [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A]] to i32 +; CHECK2-NEXT: call void @use(i32 [[A_ADDR_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK2-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 +; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[LT_CAST]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* +; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP1]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions +; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18 +; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK2: if.then: +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: ret void +; CHECK2: if.end: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19 +; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK2: if.then: +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: ret void +; CHECK2: if.end: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq +; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 +; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20 +; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL1]], align 4 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK2: if.then: +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: ret void +; CHECK2: if.end: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..21 +; CHECK2-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CANCEL2]], align 4 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK2-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK2: if.then: +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* noundef nonnull @[[GLOB1]], i32 [[TMP1]], i32 noundef 1) +; CHECK2-NEXT: ret void +; CHECK2: if.end: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_3 +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq +; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: call void @use(i32 [[ADD1_SEQ_OUTPUT_LOAD]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2: omp_region.end: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split: +; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK2: omp_region.end4: +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split.split.split: +; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; CHECK2: omp_region.body5: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] +; CHECK2: seq.par.merged2: +; CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]] +; CHECK2-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] +; CHECK2: omp.par.merged.split.split.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] +; CHECK2: omp_region.body5.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: br label [[OMP_REGION_END4]] +; CHECK2: omp_region.body: +; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] +; CHECK2: seq.par.merged: +; CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] +; CHECK2: omp.par.merged.split: +; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] +; CHECK2: omp_region.body.split: +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: br label [[OMP_REGION_END]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..25 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_seq_call +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..28 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (...) @foo() +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (...) @foo() +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..28 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..30 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_proc_bind +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..32 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..33 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..31 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..32 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..33 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@unmergable_3_num_threads +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..34 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..35 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..36 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1 +; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] +; CHECK2: omp_parallel: +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] +; CHECK2: omp.par.outlined.exit: +; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] +; CHECK2: omp.par.exit.split: +; CHECK2-NEXT: br label [[ENTRY_SPLIT_SPLIT:%.*]] +; CHECK2: entry.split.split: +; CHECK2-NEXT: call void (...) @foo() +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] +; CHECK2: omp.par.outlined.exit.exitStub: +; CHECK2-NEXT: ret void +; CHECK2: omp.par.region: +; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] +; CHECK2: omp.par.merged: +; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] +; CHECK2: entry.split: +; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] +; CHECK2: omp.par.region.split: +; CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] +; CHECK2: omp.par.pre_finalize: +; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..37 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..38 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void +; +; +; CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..39 +; CHECK2-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) { +; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK2-NEXT: call void @use(i32 [[TMP0]]) +; CHECK2-NEXT: ret void ;