diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -31,7 +31,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -41,7 +41,7 @@ // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -21,28 +21,28 @@ // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for simd if(a) @@ -67,28 +67,28 @@ for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for lastprivate(a) @@ -112,25 +112,25 @@ #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK: call i32 @__kmpc_target_init( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -172,28 +172,28 @@ #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -224,28 +224,28 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[DISTR_LIGHT]] // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target @@ -283,22 +283,22 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] #pragma omp target parallel for if(a) for (int i = 0; i < 10; ++i) @@ -321,28 +321,28 @@ #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] #pragma omp target parallel if(a) @@ -373,27 +373,27 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] // CHECK-DAG: [[BAR_LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] #pragma omp target @@ -431,22 +431,22 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-DAG: [[FOR_LIGHT]] // CHECK-DAG: [[LIGHT]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) // CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel for diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -393,7 +393,7 @@ // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x i8*], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -420,7 +420,7 @@ // CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i64 2) // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -2995,7 +2995,7 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -3006,7 +3006,7 @@ // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -3328,7 +3328,7 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -3338,7 +3338,7 @@ // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -3651,7 +3651,7 @@ // CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -3661,7 +3661,7 @@ // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp --- a/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_force_full_runtime_SPMD_codegen.cpp @@ -11,13 +11,13 @@ // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target teams distribute parallel for simd for (int i = 0; i < 10; ++i) ; @@ -40,13 +40,13 @@ for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target teams distribute parallel for lastprivate(a) for (int i = 0; i < 10; ++i) a = i; @@ -68,13 +68,13 @@ #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target teams #pragma omp distribute parallel for simd for (int i = 0; i < 10; ++i) @@ -103,13 +103,13 @@ #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target teams #pragma omp distribute parallel for for (int i = 0; i < 10; ++i) @@ -138,13 +138,13 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target #pragma omp teams #pragma omp distribute parallel for @@ -180,13 +180,13 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target parallel for for (int i = 0; i < 10; ++i) ; @@ -208,13 +208,13 @@ #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target parallel #pragma omp for simd for (int i = 0; i < 10; ++i) @@ -243,13 +243,13 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target #pragma omp parallel #pragma omp for simd ordered @@ -285,13 +285,13 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) #pragma omp target #pragma omp parallel for for (int i = 0; i < 10; ++i) diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -29,14 +29,14 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK1-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i64 0) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -66,12 +66,12 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: call void @_Z3usev() #[[ATTR6]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -107,14 +107,14 @@ // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK2-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -144,12 +144,12 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: call void @_Z3usev() #[[ATTR6]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -185,14 +185,14 @@ // CHECK3-SAME: () #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) // CHECK3-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP2]], i32 0) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -222,12 +222,12 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: call void @_Z3usev() #[[ATTR6]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -40,7 +40,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -52,7 +52,7 @@ // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -173,7 +173,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -185,7 +185,7 @@ // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -306,7 +306,7 @@ // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -318,7 +318,7 @@ // CHECK3-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP5]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1446,7 +1446,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x i8*], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1460,7 +1460,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1563,7 +1563,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1585,7 +1585,7 @@ // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1628,7 +1628,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1646,7 +1646,7 @@ // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1721,7 +1721,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x i8*], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1735,7 +1735,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1836,7 +1836,7 @@ // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1858,7 +1858,7 @@ // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1900,7 +1900,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1918,7 +1918,7 @@ // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -465,7 +465,7 @@ // CHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -487,7 +487,7 @@ // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(i8* [[D]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -584,7 +584,7 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -601,6 +601,6 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** // CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR1:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -153,7 +153,7 @@ // CHECK1-NEXT: store i32* [[PTR1]], i32** [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -166,7 +166,7 @@ // CHECK1-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**, i32**)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP7]], i64 2) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -195,11 +195,11 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -211,7 +211,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -225,7 +225,7 @@ // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 // CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -261,7 +261,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -305,7 +305,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[CALL]], align 8 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD22]], i64* [[CALL]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -338,7 +338,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -359,7 +359,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -383,7 +383,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -406,7 +406,7 @@ // CHECK1-NEXT: [[CONV8:%.*]] = fptosi double [[TMP8]] to i32 // CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], double* nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR6]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -439,7 +439,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -448,7 +448,7 @@ // CHECK1: worker.exit: // CHECK1-NEXT: ret void // CHECK1: 1: -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // // @@ -464,7 +464,7 @@ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -480,7 +480,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -540,7 +540,7 @@ // CHECK2-NEXT: store i32* [[PTR1]], i32** [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -553,7 +553,7 @@ // CHECK2-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**, i32**)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -582,11 +582,11 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -598,7 +598,7 @@ // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -612,7 +612,7 @@ // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 // CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -647,7 +647,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -691,7 +691,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[CALL]], align 8 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD21]], i64* [[CALL]], align 8 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -723,7 +723,7 @@ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -744,7 +744,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -767,7 +767,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -790,7 +790,7 @@ // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], double* nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR6]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -823,7 +823,7 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK2-SAME: () #[[ATTR1]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -832,7 +832,7 @@ // CHECK2: worker.exit: // CHECK2-NEXT: ret void // CHECK2: 1: -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // // @@ -847,7 +847,7 @@ // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -863,7 +863,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -923,7 +923,7 @@ // CHECK3-NEXT: store i32* [[PTR1]], i32** [[PTR1_ADDR]], align 4 // CHECK3-NEXT: store i32** [[PTR2]], i32*** [[PTR2_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -936,7 +936,7 @@ // CHECK3-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32**, i32**)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP7]], i32 2) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -965,11 +965,11 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -981,7 +981,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -995,7 +995,7 @@ // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 // CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1030,7 +1030,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1074,7 +1074,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[CALL]], align 8 // CHECK3-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK3-NEXT: store i64 [[ADD21]], i64* [[CALL]], align 8 -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1106,7 +1106,7 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1127,7 +1127,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1150,7 +1150,7 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1173,7 +1173,7 @@ // CHECK3-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK3-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], double* nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR6]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1206,7 +1206,7 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1215,7 +1215,7 @@ // CHECK3: worker.exit: // CHECK3-NEXT: ret void // CHECK3: 1: -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // // @@ -1230,7 +1230,7 @@ // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1246,7 +1246,7 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -58,7 +58,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -68,7 +68,7 @@ // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -105,7 +105,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -121,7 +121,7 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -165,7 +165,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -175,7 +175,7 @@ // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -212,7 +212,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -228,7 +228,7 @@ // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -272,7 +272,7 @@ // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -282,7 +282,7 @@ // CHECK3-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -319,7 +319,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -335,7 +335,7 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -379,7 +379,7 @@ // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -389,7 +389,7 @@ // CHECK4-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK4-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -426,7 +426,7 @@ // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -442,7 +442,7 @@ // CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i64 3) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -486,7 +486,7 @@ // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -496,7 +496,7 @@ // CHECK5-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -533,7 +533,7 @@ // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -549,7 +549,7 @@ // CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -593,7 +593,7 @@ // CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -603,7 +603,7 @@ // CHECK6-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void @@ -640,7 +640,7 @@ // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -656,7 +656,7 @@ // CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP11]], i32 3) -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -53,7 +53,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -63,7 +63,7 @@ // CHECK1-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -103,7 +103,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -120,7 +120,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -164,7 +164,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK2-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -174,7 +174,7 @@ // CHECK2-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -213,7 +213,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -230,7 +230,7 @@ // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -274,7 +274,7 @@ // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK3-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -284,7 +284,7 @@ // CHECK3-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -323,7 +323,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -340,7 +340,7 @@ // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -384,7 +384,7 @@ // CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 // CHECK4-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -394,7 +394,7 @@ // CHECK4-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 // CHECK4-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -434,7 +434,7 @@ // CHECK4-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -451,7 +451,7 @@ // CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 // CHECK4-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i64 3) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -495,7 +495,7 @@ // CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK5-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -505,7 +505,7 @@ // CHECK5-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -544,7 +544,7 @@ // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -561,7 +561,7 @@ // CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 // CHECK5-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -605,7 +605,7 @@ // CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 // CHECK6-NEXT: store i16* [[AA]], i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -615,7 +615,7 @@ // CHECK6-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i16*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void @@ -654,7 +654,7 @@ // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -671,7 +671,7 @@ // CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 // CHECK6-NEXT: [[TMP12:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP12]], i32 3) -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -54,8 +54,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) // // // define internal void [[PFN]]( @@ -233,8 +233,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) // // // define internal void [[PFN1]]( @@ -490,8 +490,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) // // // define internal void [[PFN2]]( diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -36,14 +36,14 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -603,14 +603,14 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1270,14 +1270,14 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1837,14 +1837,14 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2504,14 +2504,14 @@ // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3071,14 +3071,14 @@ // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -48,7 +48,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[FMT:%.*]] = alloca i8*, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -62,7 +62,7 @@ // CHECK-64-NEXT: store double 3.000000e+00, double* [[TMP4]], align 8 // CHECK-64-NEXT: [[TMP5:%.*]] = bitcast %printf_args* [[TMP]] to i8* // CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @vprintf(i8* [[TMP1]], i8* [[TMP5]]) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -71,12 +71,12 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @vprintf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str1, i64 0, i64 0), i8* null) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -89,7 +89,7 @@ // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-64-NEXT: store i64 [[FOO]], i64* [[FOO_ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[FOO_ADDR]] to i32* -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -105,7 +105,7 @@ // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // CHECK-64: if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: ret void // // @@ -117,7 +117,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[FMT:%.*]] = alloca i8*, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -131,7 +131,7 @@ // CHECK-32-NEXT: store double 3.000000e+00, double* [[TMP4]], align 8 // CHECK-32-NEXT: [[TMP5:%.*]] = bitcast %printf_args* [[TMP]] to i8* // CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @vprintf(i8* [[TMP1]], i8* [[TMP5]]) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -140,12 +140,12 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @vprintf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str1, i32 0, i32 0), i8* null) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -157,7 +157,7 @@ // CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-32-NEXT: store i32 [[FOO]], i32* [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -173,6 +173,6 @@ // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // CHECK-32: if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -61,32 +61,32 @@ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: ret void #endif diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -59,7 +59,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -70,7 +70,7 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -100,7 +100,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -111,7 +111,7 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -141,7 +141,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -152,7 +152,7 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -224,7 +224,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -235,7 +235,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -265,7 +265,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -276,7 +276,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -306,7 +306,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -317,7 +317,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -389,7 +389,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -400,7 +400,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -430,7 +430,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -441,7 +441,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -471,7 +471,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -482,7 +482,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -580,14 +580,14 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -692,7 +692,7 @@ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -702,14 +702,14 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -814,7 +814,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -824,14 +824,14 @@ // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -936,6 +936,6 @@ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18484,7 +18484,7 @@ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -18498,8 +18498,8 @@ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -18795,7 +18795,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 @@ -18807,7 +18807,7 @@ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -18817,8 +18817,8 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -19062,14 +19062,14 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -19253,7 +19253,7 @@ // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -19263,8 +19263,8 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -19486,7 +19486,7 @@ // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -19496,8 +19496,8 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -19805,7 +19805,7 @@ // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -19816,8 +19816,8 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -20078,7 +20078,7 @@ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -20092,8 +20092,8 @@ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -20389,7 +20389,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 @@ -20401,7 +20401,7 @@ // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -20411,8 +20411,8 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -20656,14 +20656,14 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -20847,7 +20847,7 @@ // CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -20857,8 +20857,8 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -21080,7 +21080,7 @@ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -21090,8 +21090,8 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -21394,7 +21394,7 @@ // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -21405,8 +21405,8 @@ // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -21665,7 +21665,7 @@ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -21677,8 +21677,8 @@ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -21961,7 +21961,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 @@ -21972,7 +21972,7 @@ // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -21981,8 +21981,8 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -22217,14 +22217,14 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -22401,7 +22401,7 @@ // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -22410,8 +22410,8 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -22622,7 +22622,7 @@ // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -22631,8 +22631,8 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -22939,7 +22939,7 @@ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -22949,8 +22949,8 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -23199,7 +23199,7 @@ // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -23211,8 +23211,8 @@ // CHECK4-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -23495,7 +23495,7 @@ // // // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 -// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 @@ -23506,7 +23506,7 @@ // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -23515,8 +23515,8 @@ // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -23751,14 +23751,14 @@ // CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -23935,7 +23935,7 @@ // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -23944,8 +23944,8 @@ // CHECK4-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -24156,7 +24156,7 @@ // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -24165,8 +24165,8 @@ // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -24473,7 +24473,7 @@ // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -24483,8 +24483,8 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -46,7 +46,7 @@ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -61,7 +61,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -304,10 +304,10 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] // CHECK1-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -354,7 +354,7 @@ // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -367,7 +367,7 @@ // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -598,10 +598,10 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK2-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK2-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK2-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -648,7 +648,7 @@ // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -661,7 +661,7 @@ // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -892,10 +892,10 @@ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK3-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK3-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK3-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -944,7 +944,7 @@ // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -959,7 +959,7 @@ // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -1202,10 +1202,10 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I7]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: [[CALL13:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[CALL]], [[CALL13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR4]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[CONV]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD14]], [[CALL15]] // CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP0]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -1252,7 +1252,7 @@ // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: @@ -1265,7 +1265,7 @@ // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -1496,10 +1496,10 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK5-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK5-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK5-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK5-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -1546,7 +1546,7 @@ // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK6-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK6-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK6-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK6: user_code.entry: @@ -1559,7 +1559,7 @@ // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK6-NEXT: ret void // CHECK6: worker.exit: // CHECK6-NEXT: ret void @@ -1790,10 +1790,10 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR4:[0-9]+]] -// CHECK6-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR4]] +// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK6-NEXT: [[CALL8:%.*]] = call i32 @_Z3fooPi(i32* [[TMP0]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK6-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR4]] +// CHECK6-NEXT: [[CALL10:%.*]] = call i32 @_Z3fooPi(i32* [[ARGC_ADDR]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] // CHECK6-NEXT: store i32 [[ADD11]], i32* [[TMP0]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9397,7 +9397,7 @@ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -9411,8 +9411,8 @@ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -9732,7 +9732,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 @@ -9744,7 +9744,7 @@ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -9754,8 +9754,8 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -10023,14 +10023,14 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -10228,7 +10228,7 @@ // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -10238,8 +10238,8 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -10479,7 +10479,7 @@ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -10491,8 +10491,8 @@ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -10799,7 +10799,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 @@ -10810,7 +10810,7 @@ // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -10819,8 +10819,8 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -11079,14 +11079,14 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -11277,7 +11277,7 @@ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -11286,8 +11286,8 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -11517,7 +11517,7 @@ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -11529,8 +11529,8 @@ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -11837,7 +11837,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 @@ -11848,7 +11848,7 @@ // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -11857,8 +11857,8 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -12117,14 +12117,14 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -12315,7 +12315,7 @@ // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -12324,8 +12324,8 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -70,24 +70,24 @@ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_for_static_fini( @@ -95,8 +95,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, -// CHECK: call i32 @__kmpc_target_init({{.*}}, i1 true, i1 false, i1 false) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i1 true, i1 false) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -899,7 +899,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -909,9 +909,9 @@ // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -939,7 +939,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -949,9 +949,9 @@ // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -979,7 +979,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -989,9 +989,9 @@ // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1019,7 +1019,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1029,9 +1029,9 @@ // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1066,7 +1066,7 @@ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1076,9 +1076,9 @@ // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1112,7 +1112,7 @@ // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1122,9 +1122,9 @@ // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1156,7 +1156,7 @@ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -1166,9 +1166,9 @@ // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -1200,7 +1200,7 @@ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -1210,9 +1210,9 @@ // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4215,7 +4215,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -4225,9 +4225,9 @@ // CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -4323,7 +4323,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -4453,7 +4453,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4501,7 +4501,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4517,7 +4517,7 @@ // CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[D_ADDR]] to float* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -4530,10 +4530,10 @@ // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -4664,7 +4664,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -4825,7 +4825,7 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4883,7 +4883,7 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -4899,14 +4899,14 @@ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -5118,7 +5118,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -5296,7 +5296,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -5463,7 +5463,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -5523,7 +5523,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -5537,7 +5537,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -5545,8 +5545,8 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], double* [[E1]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -5642,7 +5642,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -5772,7 +5772,7 @@ // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -5820,7 +5820,7 @@ // CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -5836,7 +5836,7 @@ // CHECK2-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -5849,10 +5849,10 @@ // CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -5983,7 +5983,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6144,7 +6144,7 @@ // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6202,7 +6202,7 @@ // CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6217,14 +6217,14 @@ // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -6436,7 +6436,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6614,7 +6614,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -6781,7 +6781,7 @@ // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6841,7 +6841,7 @@ // CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -6855,7 +6855,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -6863,8 +6863,8 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], double* [[E1]], align 8 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -6960,7 +6960,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7090,7 +7090,7 @@ // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7138,7 +7138,7 @@ // CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7154,7 +7154,7 @@ // CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -7167,10 +7167,10 @@ // CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -7301,7 +7301,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7462,7 +7462,7 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7520,7 +7520,7 @@ // CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -7535,14 +7535,14 @@ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -7754,7 +7754,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -7932,7 +7932,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -8099,7 +8099,7 @@ // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -8159,6 +8159,6 @@ // CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* // CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR4]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -95,7 +95,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG47]] // CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG47]] // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG47]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG47]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG47]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: @@ -117,7 +117,7 @@ // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP18]], align 8, !dbg [[DBG48]] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG48]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP19]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i1 true, i1 true), !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] @@ -316,7 +316,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG137]] // CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG137]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG137]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] // CHECK1: user_code.entry: @@ -338,7 +338,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG138]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i1 true, i1 true), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG137]] @@ -531,7 +531,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG212]] // CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG212]] // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i1 true, i1 false, i1 true), !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: @@ -549,7 +549,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG213]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i1 true, i1 true), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG212]] diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -89,7 +89,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast i8 addrspace(1)* [[TMP5]] to i8*, !dbg [[DBG41]] // CHECK1-NEXT: store i8* [[TMP6]], i8** [[_TMP2]], align 8, !dbg [[DBG41]] // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG41]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG41]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: @@ -114,7 +114,7 @@ // CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG42]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i1 true, i1 false), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB8:[0-9]+]], i8 2, i1 false), !dbg [[DBG45:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] @@ -392,7 +392,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast i8 addrspace(1)* [[TMP6]] to i8*, !dbg [[DBG146]] // CHECK1-NEXT: store i8* [[TMP7]], i8** [[_TMP2]], align 8, !dbg [[DBG146]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG146]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] // CHECK1: user_code.entry: @@ -414,7 +414,7 @@ // CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP19]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG147]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i64, [10 x [10 x i32]]*, i8*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP20]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i1 true, i1 false), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB17:[0-9]+]], i8 2, i1 false), !dbg [[DBG148:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG146]] @@ -680,7 +680,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast i8 addrspace(1)* [[TMP9]] to i8*, !dbg [[DBG236]] // CHECK1-NEXT: store i8* [[TMP10]], i8** [[_TMP3]], align 8, !dbg [[DBG236]] // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i1 true, i1 false, i1 false), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 false), !dbg [[DBG236]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] // CHECK1: user_code.entry: @@ -698,7 +698,7 @@ // CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP20]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**, !dbg [[DBG237]] // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x [10 x [10 x i32]]]*, i32*, [10 x [10 x i32]]*, i8*)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP21]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i1 true, i1 false), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB26:[0-9]+]], i8 2, i1 false), !dbg [[DBG238:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG236]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -414,8 +414,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1) +__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2761,7 +2761,9 @@ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); Value *Ident = getOrCreateIdent(SrcLocStr); - ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); + ConstantInt *IsSPMDVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), + IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); ConstantInt *UseGenericStateMachine = ConstantInt::getBool(Int32->getContext(), !IsSPMD); ConstantInt *RequiresFullRuntimeVal = @@ -2812,7 +2814,9 @@ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); Value *Ident = getOrCreateIdent(SrcLocStr); - ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD); + ConstantInt *IsSPMDVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), + IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2580,10 +2580,9 @@ CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; - const int InitIsSPMDArgNo = 1; - auto *IsSPMDModeCI = - dyn_cast(CB->getOperand(InitIsSPMDArgNo)); - return IsSPMDModeCI && IsSPMDModeCI->isZero(); + const int InitModeArgNo = 1; + auto *ModeCI = dyn_cast(CB->getOperand(InitModeArgNo)); + return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); } if (C->isZero()) { @@ -2941,7 +2940,7 @@ return FalseVal; }; - Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB = + Attributor::SimplifictionCallbackTy ModeSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { // IRP represents the "SPMDCompatibilityTracker" argument of an @@ -2957,8 +2956,10 @@ } else { UsedAssumedInformation = false; } - auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), - SPMDCompatibilityTracker.isAssumed()); + auto *Val = ConstantInt::getSigned( + IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), + SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD + : OMP_TGT_EXEC_MODE_GENERIC); return Val; }; @@ -2983,8 +2984,8 @@ return Val; }; - constexpr const int InitIsSPMDArgNo = 1; - constexpr const int DeinitIsSPMDArgNo = 1; + constexpr const int InitModeArgNo = 1; + constexpr const int DeinitModeArgNo = 1; constexpr const int InitUseStateMachineArgNo = 2; constexpr const int InitRequiresFullRuntimeArgNo = 3; constexpr const int DeinitRequiresFullRuntimeArgNo = 2; @@ -2992,11 +2993,11 @@ IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), StateMachineSimplifyCB); A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo), - IsSPMDModeSimplifyCB); + IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), + ModeSimplifyCB); A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo), - IsSPMDModeSimplifyCB); + IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), + ModeSimplifyCB); A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelInitCB, InitRequiresFullRuntimeArgNo), @@ -3007,9 +3008,9 @@ IsGenericModeSimplifyCB); // Check if we know we are in SPMD-mode already. - ConstantInt *IsSPMDArg = - dyn_cast(KernelInitCB->getArgOperand(InitIsSPMDArgNo)); - if (IsSPMDArg && !IsSPMDArg->isZero()) + ConstantInt *ModeArg = + dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); + if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) @@ -3298,21 +3299,24 @@ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. - const int InitIsSPMDArgNo = 1; - const int DeinitIsSPMDArgNo = 1; + const int InitModeArgNo = 1; + const int DeinitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; const int InitRequiresFullRuntimeArgNo = 3; const int DeinitRequiresFullRuntimeArgNo = 2; auto &Ctx = getAnchorValue().getContext(); - A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo), - *ConstantInt::getBool(Ctx, 1)); + A.changeUseAfterManifest( + KernelInitCB->getArgOperandUse(InitModeArgNo), + *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), + OMP_TGT_EXEC_MODE_SPMD)); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *ConstantInt::getBool(Ctx, 0)); A.changeUseAfterManifest( - KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo), - *ConstantInt::getBool(Ctx, 1)); + KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), + *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), + OMP_TGT_EXEC_MODE_SPMD)); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), *ConstantInt::getBool(Ctx, 0)); @@ -3337,7 +3341,7 @@ assert(ReachedKnownParallelRegions.isValidState() && "Custom state machine with invalid parallel region states?"); - const int InitIsSPMDArgNo = 1; + const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; // Check if the current configuration is non-SPMD and generic state machine. @@ -3346,14 +3350,14 @@ // we give up. ConstantInt *UseStateMachine = dyn_cast( KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); - ConstantInt *IsSPMD = - dyn_cast(KernelInitCB->getArgOperand(InitIsSPMDArgNo)); + ConstantInt *Mode = + dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); // If we are stuck with generic mode, try to create a custom device (=GPU) // state machine which is specialized for the parallel regions that are // reachable by the kernel. - if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD || - !IsSPMD->isZero()) + if (!UseStateMachine || UseStateMachine->isZero() || !Mode || + (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) return ChangeStatus::UNCHANGED; // If not SPMD mode, indicate we use a custom state machine now. diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -12,32 +12,32 @@ ; CHECK: Function Attrs: convergent norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @bar() #2 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) ; Function Attrs: convergent nounwind define hidden void @bar() #1 { diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -141,7 +141,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -149,14 +149,14 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: @@ -190,14 +190,14 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -205,7 +205,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -292,7 +292,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -300,7 +300,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -372,7 +372,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -380,7 +380,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -460,7 +460,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -468,7 +468,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -546,7 +546,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -554,7 +554,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -635,7 +635,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -643,7 +643,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -690,7 +690,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -698,7 +698,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -847,13 +847,13 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -911,7 +911,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -952,7 +952,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1036,7 +1036,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1083,7 +1083,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1184,7 +1184,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1227,7 +1227,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1310,7 +1310,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1351,7 +1351,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1434,7 +1434,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1475,7 +1475,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1558,7 +1558,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1594,7 +1594,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1658,7 +1658,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -1688,7 +1688,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1817,13 +1817,13 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1881,7 +1881,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -1921,7 +1921,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2005,7 +2005,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2051,7 +2051,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2152,7 +2152,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2194,7 +2194,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2277,7 +2277,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2317,7 +2317,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2400,7 +2400,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2440,7 +2440,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2523,7 +2523,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2558,7 +2558,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2622,7 +2622,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2651,7 +2651,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2780,13 +2780,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2843,14 +2843,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2933,14 +2933,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3040,14 +3040,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3129,14 +3129,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3218,14 +3218,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3307,13 +3307,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3376,13 +3376,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3511,13 +3511,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3574,14 +3574,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3664,14 +3664,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3771,14 +3771,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3860,14 +3860,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3949,14 +3949,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4038,13 +4038,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4107,13 +4107,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -14,7 +14,7 @@ ;; unknown(); ;; } ;; } -;; +;; ;; void test_fallback(void) { ;; #pragma omp target teams ;; { @@ -60,7 +60,7 @@ define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -77,11 +77,11 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr ; Function Attrs: convergent declare void @unknown() local_unnamed_addr #1 @@ -99,13 +99,13 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -130,7 +130,7 @@ call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 call void @no_openmp() call void @no_parallelism() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46 br label %common.ret } diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -20,37 +20,37 @@ ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 true, i1 false, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) %2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 true, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll b/llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll --- a/llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll +++ b/llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll @@ -10,7 +10,7 @@ define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 false) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: @@ -20,11 +20,11 @@ ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 false) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false) ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK-DISABLED: if.then: @@ -34,10 +34,10 @@ ; CHECK-DISABLED-NEXT: call void @bar() ; CHECK-DISABLED-NEXT: br label [[IF_END]] ; CHECK-DISABLED: if.end: -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 false) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -47,7 +47,7 @@ call void @bar() br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } @@ -135,9 +135,9 @@ declare i32 @__kmpc_get_hardware_thread_id() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -18,18 +18,18 @@ define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } @@ -38,14 +38,14 @@ define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } @@ -56,7 +56,7 @@ ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -68,12 +68,12 @@ ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -87,7 +87,7 @@ call void @helper1() call void @helper2() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } @@ -179,8 +179,8 @@ } declare i32 @__kmpc_get_hardware_num_threads_in_block() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1 +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1 declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -10,7 +10,7 @@ define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr { entry: - %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 false, i1 false) #0 + %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false, i1 false) #0 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -29,13 +29,13 @@ region.barrier: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2) - tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 true, i1 false) #0 + tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2, i1 false) #0 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() { entry: @@ -78,7 +78,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11 ; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i1 true, i1 false, i1 false) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) #[[ATTR1:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -93,6 +93,6 @@ ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: ; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]] -; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR1]] +; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR1]] ; CHECK-NEXT: br label [[COMMON_RET]] ; diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -48,7 +48,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -56,14 +56,14 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) declare void @unknown() define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { @@ -146,7 +146,7 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { entry: diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -21,18 +21,18 @@ ;. define weak void @is_spmd() { ; CHECK-LABEL: define {{[^@]+}}@is_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @is_spmd_helper1() ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @is_spmd_helper1() call void @is_spmd_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } @@ -40,7 +40,7 @@ ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -50,12 +50,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -67,41 +67,41 @@ %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @is_spmd_helper2() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } define weak void @non_spmd() { ; CHECK-LABEL: define {{[^@]+}}@non_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ret void } define weak void @will_not_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ret void } @@ -197,8 +197,8 @@ declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable" declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) declare void @foo() diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -19,46 +19,46 @@ ;. define weak void @none_spmd() { ; CHECK-LABEL: define {{[^@]+}}@none_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) ; CHECK-NEXT: call void @none_spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) call void @none_spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ret void } define weak void @spmd() { ; CHECK-LABEL: define {{[^@]+}}@spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } define weak void @parallel() { ; CHECK-LABEL: define {{[^@]+}}@parallel() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 1073741824 null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 1073741824 null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noalias noundef align 1073741824 null, i32 noundef 0, i32 noundef 0, i32 noundef 0, i32 noundef 0, i8* noalias noundef align 1073741824 null, i8* noalias noundef align 1073741824 null, i8** noalias noundef align 1073741824 null, i64 noundef 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @spmd_helper() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } @@ -130,8 +130,8 @@ declare void @foo() declare void @bar() declare i8 @__kmpc_parallel_level() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1 +declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1 !llvm.module.flags = !{!0, !1} !nvvm.annotations = !{!2, !3, !4} diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -14,8 +14,8 @@ %struct.ident_t = type { i32, i32, i32, i32, i8* } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) ;. ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* @@ -25,28 +25,28 @@ define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR4]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true, i1 true) call void @foo() call void @bar() call void @unknown_no_openmp() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ret void } diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -17,30 +17,30 @@ define dso_local void @foo() { entry: - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %x = call i8* @__kmpc_alloc_shared(i64 4) call void @unknown_no_openmp() %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* call void @use(i8* %0) call void @__kmpc_free_shared(i8* %x, i64 4) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void } define void @bar() { - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) call void @unknown_no_openmp() call void @baz() call void @qux() call void @negative_qux_spmd() - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void } define internal void @baz() { entry: - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 true) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 true) call void @unknown_no_openmp() %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %master, label %exit @@ -57,7 +57,7 @@ define internal void @qux() { entry: - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) call void @unknown_no_openmp() %0 = icmp eq i32 %call, -1 br i1 %0, label %master, label %exit @@ -74,7 +74,7 @@ define internal void @negative_qux_spmd() { entry: - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 true, i1 true) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 true, i1 true) call void @unknown_no_openmp() %0 = icmp eq i32 %call, -1 br i1 %0, label %master, label %exit @@ -106,9 +106,9 @@ declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" @@ -138,28 +138,28 @@ ;. ; CHECK-LABEL: define {{[^@]+}}@foo() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR4]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bar() { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @baz() ; CHECK-NEXT: call void @qux() ; CHECK-NEXT: call void @negative_qux_spmd() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@baz() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 1, i1 noundef false, i1 noundef true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER:%.*]], label [[EXIT:%.*]] @@ -172,7 +172,7 @@ ; ; CHECK-LABEL: define {{[^@]+}}@qux() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef false, i1 noundef true, i1 noundef true) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 1, i1 noundef true, i1 noundef true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]] @@ -185,7 +185,7 @@ ; ; CHECK-LABEL: define {{[^@]+}}@negative_qux_spmd() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i1 noundef true, i1 noundef true, i1 noundef true) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noundef nonnull @[[GLOB1]], i8 noundef 2, i1 noundef true, i1 noundef true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[TMP0]], label [[MASTER:%.*]], label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -14,7 +14,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. define void @kernel() { - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 false) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -22,7 +22,7 @@ if.else: br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } @@ -105,9 +105,9 @@ declare void @__kmpc_kernel_prepare_parallel(i8*) -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) attributes #0 = { cold noinline } diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -147,14 +147,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -164,14 +164,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -183,7 +183,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: @@ -220,7 +220,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -232,7 +232,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: @@ -268,7 +268,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -277,7 +277,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -285,14 +285,14 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; @@ -322,7 +322,7 @@ ; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; AMDGPU: for.end: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -350,7 +350,7 @@ ; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; NVPTX: for.end: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -378,7 +378,7 @@ ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; AMDGPU-DISABLED: for.end: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -406,7 +406,7 @@ ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; NVPTX-DISABLED: for.end: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -450,7 +450,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -458,7 +458,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -466,7 +466,7 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -474,7 +474,7 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -568,7 +568,7 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() #0 { ; @@ -578,14 +578,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -595,14 +595,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -614,7 +614,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: @@ -651,7 +651,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -663,7 +663,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: @@ -699,7 +699,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -708,7 +708,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -716,7 +716,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -734,7 +734,7 @@ ; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] +; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-NEXT: store i32 0, i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: @@ -752,7 +752,7 @@ ; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; AMDGPU: for.end: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -765,7 +765,7 @@ ; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] +; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-NEXT: store i32 0, i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: @@ -783,7 +783,7 @@ ; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; NVPTX: for.end: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -796,7 +796,7 @@ ; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: @@ -814,7 +814,7 @@ ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; AMDGPU-DISABLED: for.end: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -827,7 +827,7 @@ ; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: @@ -845,7 +845,7 @@ ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; NVPTX-DISABLED: for.end: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -897,7 +897,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -905,7 +905,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -913,7 +913,7 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -921,7 +921,7 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1015,14 +1015,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1032,14 +1032,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1051,7 +1051,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: @@ -1088,7 +1088,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -1100,7 +1100,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: @@ -1136,7 +1136,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -1145,7 +1145,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -1153,7 +1153,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -1190,7 +1190,7 @@ ; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; AMDGPU: for.end: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -1220,7 +1220,7 @@ ; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; NVPTX: for.end: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -1250,7 +1250,7 @@ ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; AMDGPU-DISABLED: for.end: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -1280,7 +1280,7 @@ ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; NVPTX-DISABLED: for.end: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1335,7 +1335,7 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1348,7 +1348,7 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; NVPTX-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1361,7 +1361,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1374,7 +1374,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1492,14 +1492,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1509,14 +1509,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1528,7 +1528,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: @@ -1565,7 +1565,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -1577,7 +1577,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: @@ -1613,7 +1613,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -1622,7 +1622,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -1630,7 +1630,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -1682,7 +1682,7 @@ ; AMDGPU-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; AMDGPU: for.end: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 @@ -1727,7 +1727,7 @@ ; NVPTX-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; NVPTX: for.end: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 @@ -1759,7 +1759,7 @@ ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; AMDGPU-DISABLED: for.end: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 @@ -1791,7 +1791,7 @@ ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; NVPTX-DISABLED: for.end: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1847,7 +1847,7 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1860,7 +1860,7 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; NVPTX-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1873,7 +1873,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1886,7 +1886,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2005,7 +2005,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: @@ -2035,7 +2035,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -2046,7 +2046,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: @@ -2075,7 +2075,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2087,7 +2087,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: @@ -2117,7 +2117,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2129,7 +2129,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: @@ -2158,7 +2158,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -2167,7 +2167,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -2175,7 +2175,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void worker.exit: ; preds = %entry @@ -2190,7 +2190,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 @@ -2198,7 +2198,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 @@ -2206,7 +2206,7 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 @@ -2214,7 +2214,7 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2261,36 +2261,40 @@ ; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } ; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; AMDGPU: attributes #[[ATTR4]] = { nounwind } -; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } -; AMDGPU: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU: attributes #[[ATTR7]] = { convergent } +; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } +; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU: attributes #[[ATTR8]] = { convergent } ;. ; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } ; NVPTX: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX: attributes #[[ATTR4]] = { nounwind } -; NVPTX: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } -; NVPTX: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX: attributes #[[ATTR7]] = { convergent } +; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; NVPTX: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } +; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX: attributes #[[ATTR8]] = { convergent } ;. ; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; AMDGPU-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } ; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent } +; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } ;. ; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } ; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent } +; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } +; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent } ;. ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0} diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -31,7 +31,7 @@ ; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -55,12 +55,12 @@ ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -73,11 +73,11 @@ store double %call.i, double* %x, align 8, !tbaa !8 %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0 call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 { @@ -113,7 +113,7 @@ ; Function Attrs: alwaysinline declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr ; Function Attrs: convergent declare double @__nv_sin(double) local_unnamed_addr #5 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -59,7 +59,7 @@ ; CHECK-SAME: (i32* [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: @@ -166,7 +166,7 @@ ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 ; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 ; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR4]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR4]] ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -176,7 +176,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 false, i1 false, i1 true) #[[ATTR4:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false, i1 true) #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: @@ -245,14 +245,14 @@ ; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 false, i1 true) #[[ATTR4]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 true) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; entry: %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -298,7 +298,7 @@ %call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 %call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 %call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 ret void worker.exit: ; preds = %entry @@ -330,7 +330,7 @@ declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) ; Function Attrs: convergent declare i32 @no_openmp(i32*) #1 @@ -341,7 +341,7 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn declare void @llvm.experimental.noalias.scope.decl(metadata) #4 diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll --- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -16,7 +16,7 @@ ;; unknown(); ;; } ;; } -;; +;; ;; void test_fallback(void) { ;; #pragma omp target teams ;; { @@ -62,7 +62,7 @@ define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -79,11 +79,11 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr ; Function Attrs: convergent declare void @unknown() local_unnamed_addr #1 @@ -101,13 +101,13 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -131,7 +131,7 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 call void @spmd_amenable() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46 br label %common.ret } diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -206,10 +206,10 @@ ///{ int8_t __kmpc_is_spmd_exec_mode(); -int32_t __kmpc_target_init(IdentTy *Ident, bool IsSPMD, +int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, bool UseGenericStateMachine, bool); -void __kmpc_target_deinit(IdentTy *Ident, bool IsSPMD, bool); +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool); ///} diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h --- a/openmp/libomptarget/DeviceRTL/include/Types.h +++ b/openmp/libomptarget/DeviceRTL/include/Types.h @@ -178,6 +178,11 @@ KMP_ALLOCATOR_MAX_HANDLE = ~(0U) } omp_allocator_handle_t; +enum OMPTgtExecModeFlags : int8_t { + OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, + OMP_TGT_EXEC_MODE_SPMD = 1 << 1, +}; + #define __PRAGMA(STR) _Pragma(#STR) #define OMP_PRAGMA(STR) __PRAGMA(omp STR) diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -64,8 +64,9 @@ /// /// \param Ident Source location identification, can be NULL. /// -int32_t __kmpc_target_init(IdentTy *Ident, bool IsSPMD, +int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, bool UseGenericStateMachine, bool) { + const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { inititializeRuntime(/* IsSPMD */ true); synchronize::threads(); @@ -96,7 +97,8 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(IdentTy *Ident, bool IsSPMD, bool) { +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { + const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD); if (IsSPMD) return; diff --git a/openmp/libomptarget/deviceRTLs/common/include/target.h b/openmp/libomptarget/deviceRTLs/common/include/target.h --- a/openmp/libomptarget/deviceRTLs/common/include/target.h +++ b/openmp/libomptarget/deviceRTLs/common/include/target.h @@ -36,7 +36,7 @@ /// /// \code /// void kernel(...) { -/// ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false, +/// ThreadKind = __kmpc_target_init(Ident, /* Mode */ 1, /// /* UseGenericStateMachine */ true, /// /* RequiresFullRuntime */ ... ); /// if (ThreadKind == -1) { @@ -50,7 +50,7 @@ /// /// \code /// void kernel(...) { -/// ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false, +/// ThreadKind = __kmpc_target_init(Ident, /* Mode */ 1, /// /* UseGenericStateMachine */ false, /// /* RequiresFullRuntime */ ... ); /// if (ThreadKind == -1) { @@ -72,7 +72,7 @@ /// /// \param Ident Source location identification, can be NULL. /// -int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD, +int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, bool UseGenericStateMachine, bool RequiresFullRuntime); @@ -86,7 +86,7 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD, +void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, bool RequiresFullRuntime); ///} diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h --- a/openmp/libomptarget/deviceRTLs/common/omptarget.h +++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h @@ -256,7 +256,7 @@ extern omptarget_nvptx_ThreadPrivateContext * EXTERN_SHARED(omptarget_nvptx_threadPrivateContext); -extern uint32_t EXTERN_SHARED(execution_param); +extern int8_t EXTERN_SHARED(execution_param); extern void *EXTERN_SHARED(ReductionScratchpadPtr); //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -56,7 +56,7 @@ //////////////////////////////////////////////////////////////////////////////// // OpenMP kernel execution parameters //////////////////////////////////////////////////////////////////////////////// -uint32_t SHARED(execution_param); +int8_t SHARED(execution_param); //////////////////////////////////////////////////////////////////////////////// // Scratchpad for teams reduction. diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu --- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu @@ -38,7 +38,7 @@ if (threadIdInBlock != GetMasterThreadID()) return; - setExecutionParameters(Generic, RuntimeInitialized); + setExecutionParameters(OMP_TGT_EXEC_MODE_GENERIC, OMP_TGT_RUNTIME_INITIALIZED); ASSERT0(LT_FUSSY, threadIdInBlock == GetMasterThreadID(), "__kmpc_kernel_init() must be called by team master warp only!"); PRINT0(LD_IO, "call to __kmpc_kernel_init for master\n"); @@ -85,8 +85,9 @@ static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) { PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n"); - setExecutionParameters(Spmd, RequiresFullRuntime ? RuntimeInitialized - : RuntimeUninitialized); + setExecutionParameters(OMP_TGT_EXEC_MODE_SPMD, + RequiresFullRuntime ? OMP_TGT_RUNTIME_INITIALIZED + : OMP_TGT_RUNTIME_UNINITIALIZED); int threadId = __kmpc_get_hardware_thread_id_in_block(); if (threadId == 0) { usedSlotIdx = __kmpc_impl_smid() % MAX_SM; @@ -160,7 +161,7 @@ // Return true if the current target region is executed in SPMD mode. EXTERN int8_t __kmpc_is_spmd_exec_mode() { - return (execution_param & ModeMask) == Spmd; + return execution_param & OMP_TGT_EXEC_MODE_SPMD; } EXTERN int8_t __kmpc_is_generic_main_thread(kmp_int32 Tid) { @@ -202,9 +203,10 @@ } EXTERN -int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD, +int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, bool UseGenericStateMachine, bool RequiresFullRuntime) { + const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; int TId = __kmpc_get_hardware_thread_id_in_block(); if (IsSPMD) __kmpc_spmd_kernel_init(RequiresFullRuntime); @@ -226,13 +228,13 @@ } EXTERN -void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD, - bool RequiresFullRuntime) { +void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, + bool RequiresFullRuntime) { + const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) __kmpc_spmd_kernel_deinit(RequiresFullRuntime); else __kmpc_generic_kernel_deinit(); } - #pragma omp end declare target diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -19,19 +19,18 @@ // Execution Parameters //////////////////////////////////////////////////////////////////////////////// -void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) { +void setExecutionParameters(OMPTgtExecModeFlags EMode, + OMPTgtRuntimeModeFlags RMode) { execution_param = EMode; execution_param |= RMode; } -bool isGenericMode() { return (execution_param & ModeMask) == Generic; } +bool isGenericMode() { return execution_param & OMP_TGT_EXEC_MODE_GENERIC; } -bool isRuntimeUninitialized() { - return (execution_param & RuntimeMask) == RuntimeUninitialized; -} +bool isRuntimeUninitialized() { return !isRuntimeInitialized(); } bool isRuntimeInitialized() { - return (execution_param & RuntimeMask) == RuntimeInitialized; + return execution_param & OMP_TGT_RUNTIME_INITIALIZED; } //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h --- a/openmp/libomptarget/deviceRTLs/common/support.h +++ b/openmp/libomptarget/deviceRTLs/common/support.h @@ -19,19 +19,18 @@ //////////////////////////////////////////////////////////////////////////////// // Execution Parameters //////////////////////////////////////////////////////////////////////////////// -enum ExecutionMode { - Spmd = 0x00u, - Generic = 0x01u, - ModeMask = 0x01u, +enum OMPTgtExecModeFlags : int8_t { + OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, + OMP_TGT_EXEC_MODE_SPMD = 1 << 1 }; -enum RuntimeMode { - RuntimeInitialized = 0x00u, - RuntimeUninitialized = 0x02u, - RuntimeMask = 0x02u, +enum OMPTgtRuntimeModeFlags : int8_t { + OMP_TGT_RUNTIME_UNINITIALIZED = 0, + OMP_TGT_RUNTIME_INITIALIZED = 1 << 2 }; -void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode); +void setExecutionParameters(OMPTgtExecModeFlags EMode, + OMPTgtRuntimeModeFlags RMode); bool isGenericMode(); bool isRuntimeUninitialized(); bool isRuntimeInitialized(); diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -416,10 +416,10 @@ int32_t cancelVal); // non standard -EXTERN int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD, +EXTERN int32_t __kmpc_target_init(ident_t *Ident, int8_t Mode, bool UseGenericStateMachine, bool RequiresFullRuntime); -EXTERN void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD, +EXTERN void __kmpc_target_deinit(ident_t *Ident, int8_t Mode, bool RequiresFullRuntime); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn); EXTERN bool __kmpc_kernel_parallel(void **WorkFn);