diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -795,7 +795,7 @@ emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD); + OMPBuilder.createTargetDeinit(Bld); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -833,24 +833,6 @@ IsInTTDRegion = false; } -// Create a unique global variable to indicate the execution mode of this target -// region. The execution mode is either 'generic', or 'spmd' depending on the -// target directive. This variable is picked up by the offload library to setup -// the device appropriately before kernel launch. If the execution mode is -// 'generic', the runtime reserves one warp for the master, otherwise, all -// warps participate in parallel work. -static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, - bool Mode) { - auto *GVMode = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC), - Twine(Name, "_exec_mode")); - GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); - CGM.addCompilerUsedGlobal(GVMode); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -867,8 +849,6 @@ else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - - setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -37,7 +37,7 @@ // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -61,7 +61,7 @@ // CHECK: worker.exit: // CHECK-NEXT: ret void // CHECK: for.end: -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // // @@ -78,7 +78,7 @@ // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -109,6 +109,6 @@ // CHECK-NEXT: ret void // CHECK: omp.inner.for.end: // CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4 -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/amdgcn_target_device_vla.cpp b/clang/test/OpenMP/amdgcn_target_device_vla.cpp --- a/clang/test/OpenMP/amdgcn_target_device_vla.cpp +++ b/clang/test/OpenMP/amdgcn_target_device_vla.cpp @@ -111,7 +111,7 @@ // CHECK-NEXT: [[I1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I1]] to ptr // CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -169,7 +169,7 @@ // CHECK-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK: for.end9: // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 [[TMP7]]) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // // @@ -193,18 +193,18 @@ // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: -// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP4]], ptr [[M_CASTED_ASCAST]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -559,7 +559,7 @@ // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -570,7 +570,7 @@ // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5]] -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -918,7 +918,7 @@ // CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_kernel_environment to ptr)) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -932,7 +932,7 @@ // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5]] -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c --- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -33,11 +33,11 @@ // CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr // CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_kernel_environment to ptr)) // CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-AMD: user_code.entry: -// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4 // CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 @@ -45,7 +45,7 @@ // CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // CHECK-AMD-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]] -// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-AMD-NEXT: call void @__kmpc_target_deinit() // CHECK-AMD-NEXT: ret void // CHECK-AMD: worker.exit: // CHECK-AMD-NEXT: ret void diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -31,15 +31,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -280,11 +280,11 @@ // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -293,7 +293,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -694,7 +694,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -702,7 +702,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -885,7 +885,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -893,7 +893,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1067,7 +1067,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1075,7 +1075,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1256,7 +1256,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1264,7 +1264,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1445,7 +1445,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1453,7 +1453,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1634,7 +1634,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1642,7 +1642,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1826,7 +1826,7 @@ // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1837,7 +1837,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2029,7 +2029,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2037,7 +2037,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2206,7 +2206,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2214,7 +2214,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2374,7 +2374,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2382,7 +2382,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2549,7 +2549,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2557,7 +2557,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2724,7 +2724,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2732,7 +2732,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2899,7 +2899,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2907,7 +2907,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3074,7 +3074,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3082,7 +3082,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3257,7 +3257,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3265,7 +3265,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3450,7 +3450,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3458,7 +3458,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3636,7 +3636,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3644,7 +3644,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -3825,7 +3825,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -3833,7 +3833,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4014,7 +4014,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4022,7 +4022,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4203,7 +4203,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4211,7 +4211,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4392,7 +4392,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4400,7 +4400,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4560,7 +4560,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4568,7 +4568,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4737,7 +4737,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4745,7 +4745,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -4905,7 +4905,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -4913,7 +4913,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5080,7 +5080,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5088,7 +5088,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5255,7 +5255,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5263,7 +5263,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5430,7 +5430,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5438,7 +5438,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5605,7 +5605,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5613,7 +5613,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5773,7 +5773,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5781,7 +5781,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -5950,7 +5950,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -5958,7 +5958,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6118,7 +6118,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6126,7 +6126,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6293,7 +6293,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6301,7 +6301,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6468,7 +6468,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6476,7 +6476,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6643,7 +6643,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6651,7 +6651,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6819,7 +6819,7 @@ // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -6828,7 +6828,7 @@ // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6915,13 +6915,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -6991,13 +6991,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7084,13 +7084,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7156,13 +7156,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7228,13 +7228,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7300,13 +7300,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7374,7 +7374,7 @@ // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -7383,7 +7383,7 @@ // CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7478,13 +7478,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7562,13 +7562,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7663,13 +7663,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7743,13 +7743,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7823,13 +7823,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7903,13 +7903,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -7983,13 +7983,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8064,13 +8064,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8148,13 +8148,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8249,13 +8249,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8329,13 +8329,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8409,13 +8409,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8489,13 +8489,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8569,13 +8569,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8662,13 +8662,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8738,13 +8738,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8831,13 +8831,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8903,13 +8903,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -8975,13 +8975,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -9047,13 +9047,13 @@ // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -9123,11 +9123,11 @@ // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9136,7 +9136,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9525,7 +9525,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9533,7 +9533,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9712,7 +9712,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9720,7 +9720,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -9889,7 +9889,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -9897,7 +9897,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10074,7 +10074,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10082,7 +10082,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10259,7 +10259,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10267,7 +10267,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10444,7 +10444,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10452,7 +10452,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10632,7 +10632,7 @@ // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10643,7 +10643,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -10830,7 +10830,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -10838,7 +10838,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11003,7 +11003,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11011,7 +11011,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11166,7 +11166,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11174,7 +11174,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11337,7 +11337,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11345,7 +11345,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11508,7 +11508,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11516,7 +11516,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11679,7 +11679,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11687,7 +11687,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -11850,7 +11850,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -11858,7 +11858,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12028,7 +12028,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12036,7 +12036,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12217,7 +12217,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12225,7 +12225,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12398,7 +12398,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12406,7 +12406,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12583,7 +12583,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12591,7 +12591,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12768,7 +12768,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12776,7 +12776,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -12953,7 +12953,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -12961,7 +12961,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13138,7 +13138,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13146,7 +13146,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13301,7 +13301,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13309,7 +13309,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13474,7 +13474,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13482,7 +13482,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13637,7 +13637,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13645,7 +13645,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13808,7 +13808,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13816,7 +13816,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -13979,7 +13979,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -13987,7 +13987,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14150,7 +14150,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14158,7 +14158,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14321,7 +14321,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14329,7 +14329,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14484,7 +14484,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14492,7 +14492,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14657,7 +14657,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14665,7 +14665,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14820,7 +14820,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14828,7 +14828,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -14991,7 +14991,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -14999,7 +14999,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15162,7 +15162,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15170,7 +15170,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15333,7 +15333,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15341,7 +15341,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15505,7 +15505,7 @@ // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -15514,7 +15514,7 @@ // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15601,13 +15601,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15677,13 +15677,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15770,13 +15770,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15842,13 +15842,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15914,13 +15914,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -15986,13 +15986,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16060,7 +16060,7 @@ // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -16069,7 +16069,7 @@ // CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16164,13 +16164,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16248,13 +16248,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16349,13 +16349,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16429,13 +16429,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16509,13 +16509,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16589,13 +16589,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16669,13 +16669,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16750,13 +16750,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16834,13 +16834,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -16935,13 +16935,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17015,13 +17015,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17095,13 +17095,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17175,13 +17175,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17255,13 +17255,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17348,13 +17348,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17424,13 +17424,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17517,13 +17517,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17589,13 +17589,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17661,13 +17661,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17733,13 +17733,13 @@ // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -17809,11 +17809,11 @@ // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -17822,7 +17822,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18211,7 +18211,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18219,7 +18219,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18398,7 +18398,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18406,7 +18406,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18575,7 +18575,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18583,7 +18583,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18760,7 +18760,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18768,7 +18768,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -18945,7 +18945,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -18953,7 +18953,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19130,7 +19130,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19138,7 +19138,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19318,7 +19318,7 @@ // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19329,7 +19329,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19516,7 +19516,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19524,7 +19524,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19689,7 +19689,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19697,7 +19697,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -19852,7 +19852,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -19860,7 +19860,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20023,7 +20023,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20031,7 +20031,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20194,7 +20194,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20202,7 +20202,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20365,7 +20365,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20373,7 +20373,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20536,7 +20536,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20544,7 +20544,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20714,7 +20714,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20722,7 +20722,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -20903,7 +20903,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -20911,7 +20911,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21084,7 +21084,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21092,7 +21092,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21269,7 +21269,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21277,7 +21277,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21454,7 +21454,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21462,7 +21462,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21639,7 +21639,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21647,7 +21647,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21824,7 +21824,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21832,7 +21832,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -21987,7 +21987,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -21995,7 +21995,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22160,7 +22160,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22168,7 +22168,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22323,7 +22323,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22331,7 +22331,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22494,7 +22494,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22502,7 +22502,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22665,7 +22665,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22673,7 +22673,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -22836,7 +22836,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -22844,7 +22844,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23007,7 +23007,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23015,7 +23015,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23170,7 +23170,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23178,7 +23178,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23343,7 +23343,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23351,7 +23351,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23506,7 +23506,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23514,7 +23514,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23677,7 +23677,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23685,7 +23685,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -23848,7 +23848,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -23856,7 +23856,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24019,7 +24019,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24027,7 +24027,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24191,7 +24191,7 @@ // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24200,7 +24200,7 @@ // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24287,13 +24287,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24363,13 +24363,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24456,13 +24456,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24528,13 +24528,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24600,13 +24600,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24672,13 +24672,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24746,7 +24746,7 @@ // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -24755,7 +24755,7 @@ // CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24850,13 +24850,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -24934,13 +24934,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25035,13 +25035,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25115,13 +25115,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25195,13 +25195,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25275,13 +25275,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25355,13 +25355,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25436,13 +25436,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25520,13 +25520,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25621,13 +25621,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25701,13 +25701,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25781,13 +25781,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25861,13 +25861,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -25941,13 +25941,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26034,13 +26034,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26110,13 +26110,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26203,13 +26203,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26275,13 +26275,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26347,13 +26347,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -26419,13 +26419,13 @@ // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -36,13 +36,13 @@ // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_kernel_environment) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: store i32 10, ptr [[A]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 @@ -56,7 +56,7 @@ // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -44,18 +44,18 @@ // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK4-NEXT: call void @__kmpc_target_deinit() // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -360,18 +360,18 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK5-NEXT: call void @__kmpc_target_deinit() // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -834,7 +834,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -846,7 +846,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -877,18 +877,18 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -953,7 +953,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -983,7 +983,7 @@ // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1016,7 +1016,7 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1038,7 +1038,7 @@ // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1124,7 +1124,7 @@ // CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1133,7 +1133,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1193,7 +1193,7 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1223,7 +1223,7 @@ // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1256,11 +1256,11 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 @@ -1278,7 +1278,7 @@ // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 // CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1368,7 +1368,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1380,7 +1380,7 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1411,7 +1411,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1422,7 +1422,7 @@ // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 // CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1465,7 +1465,7 @@ // CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -1474,7 +1474,7 @@ // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -29,13 +29,13 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -64,12 +64,12 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -105,13 +105,13 @@ // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -140,12 +140,12 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK2-SAME: () #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -40,16 +40,16 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -150,16 +150,16 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -81,18 +81,18 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0) // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,7 +192,7 @@ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -213,7 +213,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -255,7 +255,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -270,7 +270,7 @@ // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -344,18 +344,18 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0) // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -455,7 +455,7 @@ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -476,7 +476,7 @@ // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -518,7 +518,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -533,7 +533,7 @@ // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -41,12 +41,12 @@ // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_kernel_environment) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 @@ -59,7 +59,7 @@ // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -153,17 +153,17 @@ // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,11 +192,11 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -207,7 +207,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -221,7 +221,7 @@ // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 // CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -256,7 +256,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -300,7 +300,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 // CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -330,7 +330,7 @@ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -351,7 +351,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -374,7 +374,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -397,7 +397,7 @@ // CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -426,7 +426,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -435,7 +435,7 @@ // CHECK1: worker.exit: // CHECK1-NEXT: ret void // CHECK1: 1: -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // // @@ -449,7 +449,7 @@ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -465,7 +465,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -523,17 +523,17 @@ // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -562,11 +562,11 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK2-SAME: () #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -577,7 +577,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -591,7 +591,7 @@ // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 // CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -626,7 +626,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -670,7 +670,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -700,7 +700,7 @@ // CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -721,7 +721,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -744,7 +744,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -767,7 +767,7 @@ // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -796,7 +796,7 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK2-SAME: () #[[ATTR4]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -805,7 +805,7 @@ // CHECK2: worker.exit: // CHECK2-NEXT: ret void // CHECK2: 1: -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // // @@ -819,7 +819,7 @@ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -835,7 +835,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -58,15 +58,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -103,7 +103,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -115,7 +115,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -159,15 +159,15 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,7 +204,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -216,7 +216,7 @@ // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -53,15 +53,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -100,7 +100,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -113,7 +113,7 @@ // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -157,15 +157,15 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,7 +204,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -217,7 +217,7 @@ // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -52,141 +52,17 @@ } #endif -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 -// CHECK-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) -// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK: user_code.entry: -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) -// CHECK-NEXT: ret void -// CHECK: worker.exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31 -// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 -// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) -// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK: user_code.entry: -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr -// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) -// CHECK-NEXT: ret void -// CHECK: worker.exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36 -// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 -// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) -// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK: user_code.entry: -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr -// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr -// CHECK-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) -// CHECK-NEXT: ret void -// CHECK: worker.exit: -// CHECK-NEXT: ret void -// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: ret void // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -209,7 +85,7 @@ // CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -221,7 +97,7 @@ // CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -257,7 +133,7 @@ // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -277,7 +153,7 @@ // CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 // CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -316,13 +192,13 @@ // CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -345,7 +221,7 @@ // CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -357,7 +233,7 @@ // CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -393,7 +269,7 @@ // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -413,7 +289,7 @@ // CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -452,13 +328,13 @@ // CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -481,7 +357,7 @@ // CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -493,7 +369,7 @@ // CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -529,7 +405,7 @@ // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -549,7 +425,7 @@ // CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -588,13 +464,13 @@ // CHECK-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -617,7 +493,7 @@ // CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -629,7 +505,7 @@ // CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -665,7 +541,7 @@ // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -685,7 +561,7 @@ // CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -724,13 +600,13 @@ // CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -753,7 +629,7 @@ // CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -765,7 +641,7 @@ // CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -801,7 +677,7 @@ // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -821,7 +697,7 @@ // CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -860,13 +736,13 @@ // CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -889,7 +765,7 @@ // CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -901,7 +777,7 @@ // CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -937,7 +813,7 @@ // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -957,7 +833,7 @@ // CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -104,15 +104,15 @@ // CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -286,7 +286,7 @@ // CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -296,7 +296,7 @@ // CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -523,7 +523,7 @@ // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -533,7 +533,7 @@ // CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 // CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -774,15 +774,15 @@ // CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -956,7 +956,7 @@ // CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -966,7 +966,7 @@ // CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -1193,7 +1193,7 @@ // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1203,7 +1203,7 @@ // CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 // CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -1444,15 +1444,15 @@ // CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -1626,7 +1626,7 @@ // CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1636,7 +1636,7 @@ // CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -1863,7 +1863,7 @@ // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1873,7 +1873,7 @@ // CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -35,15 +35,15 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -527,7 +527,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -535,7 +535,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -45,7 +45,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -58,7 +58,7 @@ // CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -67,12 +67,12 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -84,7 +84,7 @@ // CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -99,7 +99,7 @@ // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // CHECK-64: if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // // @@ -108,7 +108,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -121,7 +121,7 @@ // CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -130,12 +130,12 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -147,7 +147,7 @@ // CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -162,6 +162,6 @@ // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // CHECK-32: if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -72,7 +72,7 @@ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -124,7 +124,7 @@ // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-64-NEXT: br label [[SIMD_IF_END]] // CHECK45-64: simd.if.end: -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // // @@ -142,7 +142,7 @@ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -198,7 +198,7 @@ // CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-64-NEXT: br label [[SIMD_IF_END]] // CHECK45-64: simd.if.end: -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // // @@ -211,7 +211,7 @@ // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -244,7 +244,7 @@ // CHECK45-64-NEXT: ret void // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // // @@ -261,7 +261,7 @@ // CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -299,7 +299,7 @@ // CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // // @@ -317,7 +317,7 @@ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -368,7 +368,7 @@ // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-32-NEXT: br label [[SIMD_IF_END]] // CHECK45-32: simd.if.end: -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // // @@ -386,7 +386,7 @@ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -441,7 +441,7 @@ // CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-32-NEXT: br label [[SIMD_IF_END]] // CHECK45-32: simd.if.end: -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // // @@ -454,7 +454,7 @@ // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -486,7 +486,7 @@ // CHECK45-32-NEXT: ret void // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // // @@ -503,7 +503,7 @@ // CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -540,7 +540,7 @@ // CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // // @@ -558,7 +558,7 @@ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -609,7 +609,7 @@ // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK45-32-EX: simd.if.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // // @@ -627,7 +627,7 @@ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -682,7 +682,7 @@ // CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK45-32-EX: simd.if.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // // @@ -695,7 +695,7 @@ // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -727,7 +727,7 @@ // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // // @@ -744,7 +744,7 @@ // CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -781,7 +781,7 @@ // CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // // @@ -799,7 +799,7 @@ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -851,7 +851,7 @@ // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-64-NEXT: br label [[SIMD_IF_END]] // CHECK-64: simd.if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // // @@ -869,7 +869,7 @@ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -925,7 +925,7 @@ // CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-64-NEXT: br label [[SIMD_IF_END]] // CHECK-64: simd.if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // // @@ -938,7 +938,7 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -971,7 +971,7 @@ // CHECK-64-NEXT: ret void // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // // @@ -988,7 +988,7 @@ // CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1026,7 +1026,7 @@ // CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // // @@ -1044,7 +1044,7 @@ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1095,7 +1095,7 @@ // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-32-NEXT: br label [[SIMD_IF_END]] // CHECK-32: simd.if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // // @@ -1113,7 +1113,7 @@ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1168,7 +1168,7 @@ // CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-32-NEXT: br label [[SIMD_IF_END]] // CHECK-32: simd.if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // // @@ -1181,7 +1181,7 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1213,7 +1213,7 @@ // CHECK-32-NEXT: ret void // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // // @@ -1230,7 +1230,7 @@ // CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -1267,7 +1267,7 @@ // CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // // @@ -1285,7 +1285,7 @@ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1336,7 +1336,7 @@ // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4 // CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK-32-EX: simd.if.end: -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // // @@ -1354,7 +1354,7 @@ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1409,7 +1409,7 @@ // CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4 // CHECK-32-EX-NEXT: br label [[SIMD_IF_END]] // CHECK-32-EX: simd.if.end: -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // // @@ -1422,7 +1422,7 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1454,7 +1454,7 @@ // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // // @@ -1471,7 +1471,7 @@ // CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -1508,6 +1508,6 @@ // CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4 // CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -57,18 +57,18 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -95,7 +95,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -106,7 +106,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -133,7 +133,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -144,7 +144,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -209,18 +209,18 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -247,7 +247,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -258,7 +258,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -285,7 +285,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -296,7 +296,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -37,15 +37,15 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -155,15 +155,15 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -85,11 +85,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -99,7 +99,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -396,7 +396,7 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -407,7 +407,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -645,7 +645,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -653,7 +653,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -833,7 +833,7 @@ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -844,7 +844,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1059,7 +1059,7 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1070,7 +1070,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1371,7 +1371,7 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1383,7 +1383,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1635,11 +1635,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1649,7 +1649,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1946,7 +1946,7 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1957,7 +1957,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2195,7 +2195,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2203,7 +2203,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2383,7 +2383,7 @@ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2394,7 +2394,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2609,7 +2609,7 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2620,7 +2620,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2916,7 +2916,7 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2928,7 +2928,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -3180,11 +3180,11 @@ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -3194,7 +3194,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3484,7 +3484,7 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3495,7 +3495,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3727,7 +3727,7 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3735,7 +3735,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3909,7 +3909,7 @@ // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3920,7 +3920,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4128,7 +4128,7 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -4139,7 +4139,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4443,7 +4443,7 @@ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -4455,7 +4455,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -43,11 +43,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 @@ -57,7 +57,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -341,11 +341,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 @@ -355,7 +355,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -73,11 +73,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -87,7 +87,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -408,7 +408,7 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -419,7 +419,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -681,7 +681,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -689,7 +689,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -883,7 +883,7 @@ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -894,7 +894,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1128,11 +1128,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -1142,7 +1142,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1456,7 +1456,7 @@ // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1467,7 +1467,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1723,7 +1723,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1731,7 +1731,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1919,7 +1919,7 @@ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1930,7 +1930,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -81,11 +81,11 @@ // CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: -// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -95,7 +95,7 @@ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -246,7 +246,7 @@ // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -257,7 +257,7 @@ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -396,7 +396,7 @@ // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -404,7 +404,7 @@ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -515,7 +515,7 @@ // CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: @@ -526,7 +526,7 @@ // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-64-NEXT: call void @__kmpc_target_deinit() // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: // CHECK45-64-NEXT: ret void @@ -664,11 +664,11 @@ // CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: -// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -678,7 +678,7 @@ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -828,7 +828,7 @@ // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -839,7 +839,7 @@ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -977,7 +977,7 @@ // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -985,7 +985,7 @@ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -1095,7 +1095,7 @@ // CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: @@ -1106,7 +1106,7 @@ // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: // CHECK45-32-NEXT: ret void @@ -1242,11 +1242,11 @@ // CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -1256,7 +1256,7 @@ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1406,7 +1406,7 @@ // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1417,7 +1417,7 @@ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1555,7 +1555,7 @@ // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1563,7 +1563,7 @@ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1673,7 +1673,7 @@ // CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: @@ -1684,7 +1684,7 @@ // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: // CHECK45-32-EX-NEXT: ret void @@ -1820,11 +1820,11 @@ // CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1834,7 +1834,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -1985,7 +1985,7 @@ // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -1996,7 +1996,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2135,7 +2135,7 @@ // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2143,7 +2143,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2254,7 +2254,7 @@ // CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -2265,7 +2265,7 @@ // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-64-NEXT: call void @__kmpc_target_deinit() // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -2403,11 +2403,11 @@ // CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -2417,7 +2417,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2567,7 +2567,7 @@ // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2578,7 +2578,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2716,7 +2716,7 @@ // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2724,7 +2724,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2834,7 +2834,7 @@ // CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -2845,7 +2845,7 @@ // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-NEXT: call void @__kmpc_target_deinit() // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -2981,11 +2981,11 @@ // CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -2995,7 +2995,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3145,7 +3145,7 @@ // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3156,7 +3156,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3294,7 +3294,7 @@ // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3302,7 +3302,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void @@ -3412,7 +3412,7 @@ // CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: @@ -3423,7 +3423,7 @@ // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit() // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp @@ -76,18 +76,18 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -325,7 +325,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -333,7 +333,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -513,7 +513,7 @@ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -524,7 +524,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -739,7 +739,7 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -750,7 +750,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1051,7 +1051,7 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -1063,7 +1063,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1312,18 +1312,18 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1561,7 +1561,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1569,7 +1569,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1749,7 +1749,7 @@ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1760,7 +1760,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1975,7 +1975,7 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1986,7 +1986,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2282,7 +2282,7 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -2294,7 +2294,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2543,18 +2543,18 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2786,7 +2786,7 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -2794,7 +2794,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2968,7 +2968,7 @@ // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -2979,7 +2979,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3187,7 +3187,7 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3198,7 +3198,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3502,7 +3502,7 @@ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3514,7 +3514,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp @@ -40,18 +40,18 @@ // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -290,18 +290,18 @@ // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -83,19 +83,19 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -122,7 +122,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -134,7 +134,7 @@ // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -161,19 +161,19 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -200,7 +200,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -212,7 +212,7 @@ // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -243,19 +243,19 @@ // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -286,7 +286,7 @@ // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -298,7 +298,7 @@ // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -329,19 +329,19 @@ // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK4-NEXT: call void @__kmpc_target_deinit() // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -372,7 +372,7 @@ // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -384,7 +384,7 @@ // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK4-NEXT: call void @__kmpc_target_deinit() // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -57,19 +57,19 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8 // CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -328,7 +328,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -344,7 +344,7 @@ // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -677,7 +677,7 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -685,7 +685,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit() // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1250,17 +1250,17 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1519,7 +1519,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1535,7 +1535,7 @@ // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1868,7 +1868,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1876,7 +1876,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit() // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2441,17 +2441,17 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2710,7 +2710,7 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -2726,7 +2726,7 @@ // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3059,7 +3059,7 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -3067,7 +3067,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit() // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -103,16 +103,16 @@ // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -84,9 +84,9 @@ } #pragma omp begin declare target device_type(nohost) -__attribute__((weak)) -extern "C" int __kmpc_target_init(void *Ident, char Mode, - bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} +struct KernelEnvironmentTy; +__attribute__((weak)) +extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -40,9 +40,9 @@ } #pragma omp begin declare target device_type(nohost) -__attribute__((weak)) -extern "C" int __kmpc_target_init(void *Ident, char Mode, - bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} +struct KernelEnvironmentTy; +__attribute__((weak)) +extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -95,7 +95,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment), !dbg [[DBG47]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: @@ -113,7 +113,7 @@ // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] @@ -308,11 +308,11 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment), !dbg [[DBG137]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]] @@ -325,8 +325,8 @@ // CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG139:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG137]] @@ -517,11 +517,11 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment), !dbg [[DBG212]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] @@ -530,8 +530,8 @@ // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] // CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG214:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG212]] diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -89,7 +89,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: @@ -110,7 +110,7 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] // CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] @@ -383,11 +383,11 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] @@ -400,8 +400,8 @@ // CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG146]] @@ -466,7 +466,7 @@ // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] @@ -559,7 +559,7 @@ // CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] // // @@ -665,11 +665,11 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] @@ -678,8 +678,8 @@ // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] // CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG236]] @@ -749,7 +749,7 @@ // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] @@ -846,7 +846,7 @@ // CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp @@ -89,7 +89,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: @@ -110,7 +110,7 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] // CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] @@ -383,11 +383,11 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] @@ -400,8 +400,8 @@ // CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG146]] @@ -466,7 +466,7 @@ // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] @@ -559,7 +559,7 @@ // CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] // // @@ -665,11 +665,11 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] @@ -678,8 +678,8 @@ // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] // CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG236]] @@ -749,7 +749,7 @@ // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] // CHECK1: omp.dispatch.cond: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] @@ -846,7 +846,7 @@ // CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp @@ -53,18 +53,18 @@ // IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment to ptr)) // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // IR-GPU: user_code.entry: -// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 // IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 // IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8 // IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2) -// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// IR-GPU-NEXT: call void @__kmpc_target_deinit() // IR-GPU-NEXT: ret void // IR-GPU: worker.exit: // IR-GPU-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -1158,18 +1158,18 @@ // IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 // IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_kernel_environment to ptr)) // IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // IR-GPU: user_code.entry: -// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 // IR-GPU-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 // IR-GPU-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 // IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // IR-GPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 // IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] -// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// IR-GPU-NEXT: call void @__kmpc_target_deinit() // IR-GPU-NEXT: ret void // IR-GPU: worker.exit: // IR-GPU-NEXT: ret void diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1997,8 +1997,7 @@ /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. - /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); + void createTargetDeinit(const LocationDescription &Loc); ///} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -96,6 +96,11 @@ Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32) __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr) __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8) +__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false, + Int8, Int8, Int8) +__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16) +__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false, + ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -452,8 +457,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) +__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr) +__OMP_RTL(__kmpc_target_deinit, false, Void,) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -1020,9 +1025,9 @@ ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt, - ParamAttrs(AttributeSet(), SExt, SExt)) + ParamAttrs(AttributeSet())) __OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(), - ParamAttrs(AttributeSet(), SExt)) + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, AttributeSet(), AttributeSet(), AttributeSet(), diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -24,6 +24,7 @@ #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" @@ -4016,14 +4017,60 @@ ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *UseGenericStateMachine = - ConstantInt::getBool(Int32->getContext(), !IsSPMD); + ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned( + IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD); + ConstantInt *MayUseNestedParallelismVal = + ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true); + ConstantInt *DebugIndentionLevelVal = + ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0); + + // We need to strip the debug prefix to get the correct kernel name. + Function *Kernel = Builder.GetInsertBlock()->getParent(); + StringRef KernelName = Kernel->getName(); + const std::string DebugPrefix = "_debug__"; + if (KernelName.ends_with(DebugPrefix)) + KernelName = KernelName.drop_back(DebugPrefix.length()); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); - - CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); + const DataLayout &DL = Fn->getParent()->getDataLayout(); + + Twine DynamicEnvironmentName = KernelName + "_dynamic_environment"; + Constant *DynamicEnvironmentInitializer = + ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal}); + Constant *DynamicEnvironmentGV = new GlobalVariable( + M, DynamicEnvironment, /* IsConstant */ false, + GlobalValue::InternalLinkage, DynamicEnvironmentInitializer, + DynamicEnvironmentName, + /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + if (DynamicEnvironmentGV->getType() != DynamicEnvironmentPtr) + DynamicEnvironmentGV = ConstantExpr::getAddrSpaceCast( + DynamicEnvironmentGV, DynamicEnvironmentPtr); + + Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get( + ConfigurationEnvironment, { + UseGenericStateMachineVal, + MayUseNestedParallelismVal, + IsSPMDVal, + }); + Constant *KernelEnvironmentInitializer = ConstantStruct::get( + KernelEnvironment, { + ConfigurationEnvironmentInitializer, + Ident, + DynamicEnvironmentGV, + }); + Twine KernelEnvironmentName = KernelName + "_kernel_environment"; + Constant *KernelEnvironmentGV = new GlobalVariable( + M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage, + KernelEnvironmentInitializer, KernelEnvironmentName, + /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal, + DL.getDefaultGlobalsAddressSpace()); + if (KernelEnvironmentGV->getType() != KernelEnvironmentPtr) + KernelEnvironmentGV = ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV, + KernelEnvironmentPtr); + + CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -4056,22 +4103,14 @@ return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt()); } -void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD) { +void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) { if (!updateToLocation(Loc)) return; - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); - ConstantInt *IsSPMDVal = ConstantInt::getSigned( - IntegerType::getInt8Ty(Int8->getContext()), - IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal}); + Builder.CreateCall(Fn, {}); } void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( @@ -4321,7 +4360,7 @@ // Insert target deinit call in the device compilation pass. if (OMPBuilder.Config.isTargetDevice()) - OMPBuilder.createTargetDeinit(Builder, /*IsSPMD*/ false); + OMPBuilder.createTargetDeinit(Builder); // Insert return instruction. Builder.CreateRetVoid(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" @@ -181,6 +182,80 @@ static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif +namespace KernelInfo { + +// struct ConfigurationEnvironmentTy { +// uint8_t UseGenericStateMachine; +// uint8_t MayUseNestedParallelism; +// llvm::omp::OMPTgtExecModeFlags ExecMode; +// }; + +// struct DynamicEnvironmentTy { +// uint16_t DebugIndentionLevel; +// }; + +// struct KernelEnvironmentTy { +// ConfigurationEnvironmentTy Configuration; +// IdentTy *Ident; +// DynamicEnvironmentTy *DynamicEnv; +// }; + +#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \ + constexpr const unsigned MEMBER##Idx = IDX; + +KERNEL_ENVIRONMENT_IDX(Configuration, 0) +KERNEL_ENVIRONMENT_IDX(Ident, 1) + +#undef KERNEL_ENVIRONMENT_IDX + +#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \ + constexpr const unsigned MEMBER##Idx = IDX; + +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0) +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) +KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX + +#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \ + RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \ + return cast(KernelEnvC->getAggregateElement(MEMBER##Idx)); \ + } + +KERNEL_ENVIRONMENT_GETTER(Ident, Constant) +KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct) + +#undef KERNEL_ENVIRONMENT_GETTER + +#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \ + ConstantInt *get##MEMBER##FromKernelEnvironment( \ + ConstantStruct *KernelEnvC) { \ + ConstantStruct *ConfigC = \ + getConfigurationFromKernelEnvironment(KernelEnvC); \ + return dyn_cast(ConfigC->getAggregateElement(MEMBER##Idx)); \ + } + +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine) +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism) +KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER + +GlobalVariable * +getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { + constexpr const int InitKernelEnvironmentArgNo = 0; + return cast( + KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo) + ->stripPointerCasts()); +} + +ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) { + GlobalVariable *KernelEnvGV = + getKernelEnvironementGVFromKernelInitCB(KernelInitCB); + return cast(KernelEnvGV->getInitializer()); +} +} // namespace KernelInfo + namespace { struct AAHeapToShared; @@ -610,6 +685,10 @@ /// one we abort as the kernel is malformed. CallBase *KernelInitCB = nullptr; + /// The constant kernel environement as taken from and passed to + /// __kmpc_target_init. + ConstantStruct *KernelEnvC = nullptr; + /// The __kmpc_target_deinit call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; @@ -714,6 +793,12 @@ "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } + if (KIS.KernelEnvC) { + if (KernelEnvC && KernelEnvC != KIS.KernelEnvC) + llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " + "assumptions."); + KernelEnvC = KIS.KernelEnvC; + } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; @@ -2779,9 +2864,11 @@ CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; - const int InitModeArgNo = 1; - auto *ModeCI = dyn_cast(CB->getOperand(InitModeArgNo)); - return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); + ConstantStruct *KernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(CB); + ConstantInt *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); + return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC; } if (C->isZero()) { @@ -3500,6 +3587,29 @@ return GuardedInstructions; } + void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) { + Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction( + KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx}); + assert(NewKernelEnvC && "Failed to create new kernel environment"); + KernelEnvC = cast(NewKernelEnvC); + } + +#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \ + void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \ + ConstantStruct *ConfigC = \ + KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \ + Constant *NewConfigC = ConstantFoldInsertValueInstruction( \ + ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \ + assert(NewConfigC && "Failed to create new configuration environment"); \ + setConfigurationOfKernelEnvironment(cast(NewConfigC)); \ + } + + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine) + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism) + KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode) + +#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER + /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments @@ -3548,61 +3658,52 @@ ReachingKernelEntries.insert(Fn); IsKernelEntry = true; - // For kernels we might need to initialize/finalize the IsSPMD state and - // we need to register a simplification callback so that the Attributor - // knows the constant arguments to __kmpc_target_init and - // __kmpc_target_deinit might actually change. + KernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); + GlobalVariable *KernelEnvGV = + KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); - Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = - [&](const IRPosition &IRP, const AbstractAttribute *AA, - bool &UsedAssumedInformation) -> std::optional { - return nullptr; - }; - - Attributor::SimplifictionCallbackTy ModeSimplifyCB = - [&](const IRPosition &IRP, const AbstractAttribute *AA, - bool &UsedAssumedInformation) -> std::optional { - // IRP represents the "SPMDCompatibilityTracker" argument of an - // __kmpc_target_init or - // __kmpc_target_deinit call. We will answer this one with the internal - // state. - if (!SPMDCompatibilityTracker.isValidState()) - return nullptr; - if (!SPMDCompatibilityTracker.isAtFixpoint()) { - if (AA) - A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); - UsedAssumedInformation = true; - } else { - UsedAssumedInformation = false; - } - auto *Val = ConstantInt::getSigned( - IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), - SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD - : OMP_TGT_EXEC_MODE_GENERIC); - return Val; + Attributor::GlobalVariableSimplifictionCallbackTy + KernelConfigurationSimplifyCB = + [&](const GlobalVariable &GV, const AbstractAttribute *AA, + bool &UsedAssumedInformation) -> std::optional { + return KernelEnvC; }; - constexpr const int InitModeArgNo = 1; - constexpr const int DeinitModeArgNo = 1; - constexpr const int InitUseStateMachineArgNo = 2; - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), - StateMachineSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), - ModeSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), - ModeSimplifyCB); + A.registerGlobalVariableSimplificationCallback( + *KernelEnvGV, KernelConfigurationSimplifyCB); // Check if we know we are in SPMD-mode already. - ConstantInt *ModeArg = - dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); - if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + ConstantInt *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); + ConstantInt *AssumedExecModeC = ConstantInt::get( + ExecModeC->getType(), + ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); + if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); - // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) + // This is a generic region but SPMDization is disabled so stop + // tracking. SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else + setExecModeOfKernelEnvironment(AssumedExecModeC); + + ConstantInt *MayUseNestedParallelismC = + KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); + ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( + MayUseNestedParallelismC->getType(), NestedParallelism); + setMayUseNestedParallelismOfKernelEnvironment( + AssumedMayUseNestedParallelismC); + + if (!DisableOpenMPOptStateMachineRewrite) { + ConstantInt *UseGenericStateMachineC = + KernelInfo::getUseGenericStateMachineFromKernelEnvironment( + KernelEnvC); + ConstantInt *AssumedUseGenericStateMachineC = + ConstantInt::get(UseGenericStateMachineC->getType(), false); + setUseGenericStateMachineOfKernelEnvironment( + AssumedUseGenericStateMachineC); + } // Register virtual uses of functions we might need to preserve. auto RegisterVirtualUse = [&](RuntimeFunction RFKind, @@ -3703,21 +3804,21 @@ if (!KernelInitCB || !KernelDeinitCB) return ChangeStatus::UNCHANGED; - /// Insert nested Parallelism global variable - Function *Kernel = getAnchorScope(); - Module &M = *Kernel->getParent(); - Type *Int8Ty = Type::getInt8Ty(M.getContext()); - new GlobalVariable(M, Int8Ty, /* isConstant */ true, - GlobalValue::WeakAnyLinkage, - ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0), - Kernel->getName() + "_nested_parallelism"); + ChangeStatus Changed = ChangeStatus::UNCHANGED; // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. - ChangeStatus Changed = ChangeStatus::UNCHANGED; if (!changeToSPMDMode(A, Changed)) { if (!KernelInitCB->getCalledFunction()->isDeclaration()) - return buildCustomStateMachine(A); + Changed |= buildCustomStateMachine(A); + } + + // At last, update the KernelEnvc + GlobalVariable *KernelEnvGV = + KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); + if (KernelEnvGV->getInitializer() != KernelEnvC) { + KernelEnvGV->setInitializer(KernelEnvC); + Changed = ChangeStatus::CHANGED; } return Changed; @@ -3787,14 +3888,14 @@ // Find escaping outputs from the guarded region to outside users and // broadcast their values to them. for (Instruction &I : *RegionStartBB) { - SmallPtrSet OutsideUsers; - for (User *Usr : I.users()) { - Instruction &UsrI = *cast(Usr); + SmallVector OutsideUses; + for (Use &U : I.uses()) { + Instruction &UsrI = *cast(U.getUser()); if (UsrI.getParent() != RegionStartBB) - OutsideUsers.insert(&UsrI); + OutsideUses.push_back(&U); } - if (OutsideUsers.empty()) + if (OutsideUses.empty()) continue; HasBroadcastValues = true; @@ -3817,8 +3918,8 @@ RegionBarrierBB->getTerminator()); // Emit a load instruction and replace uses of the output value. - for (Instruction *UsrI : OutsideUsers) - UsrI->replaceUsesOfWith(&I, LoadI); + for (Use *U : OutsideUses) + A.changeUseAfterManifest(*U, *LoadI); } auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -4045,16 +4146,11 @@ assert(omp::isKernel(*Kernel) && "Expected kernel function!"); // Check if the kernel is already in SPMD mode, if so, return success. - GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( - (Kernel->getName() + "_exec_mode").str()); - assert(ExecMode && "Kernel without exec mode?"); - assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); - - // Set the global exec mode flag to indicate SPMD-Generic mode. - assert(isa(ExecMode->getInitializer()) && - "ExecMode is not an integer!"); - const int8_t ExecModeVal = - cast(ExecMode->getInitializer())->getSExtValue(); + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); + auto *ExecModeC = + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); + const int8_t ExecModeVal = ExecModeC->getSExtValue(); if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) return true; @@ -4072,27 +4168,8 @@ // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); - ExecMode->setInitializer( - ConstantInt::get(ExecMode->getInitializer()->getType(), - ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); - - // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. - const int InitModeArgNo = 1; - const int DeinitModeArgNo = 1; - const int InitUseStateMachineArgNo = 2; - - auto &Ctx = getAnchorValue().getContext(); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitModeArgNo), - *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), - OMP_TGT_EXEC_MODE_SPMD)); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), - *ConstantInt::getBool(Ctx, false)); - A.changeUseAfterManifest( - KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), - *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), - OMP_TGT_EXEC_MODE_SPMD)); + setExecModeOfKernelEnvironment(ConstantInt::get( + ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); ++NumOpenMPTargetRegionKernelsSPMD; @@ -4119,30 +4196,29 @@ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) return ChangeStatus::UNCHANGED; - const int InitModeArgNo = 1; - const int InitUseStateMachineArgNo = 2; + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); // Check if the current configuration is non-SPMD and generic state machine. // If we already have SPMD mode or a custom state machine we do not need to // go any further. If it is anything but a constant something is weird and // we give up. - ConstantInt *UseStateMachine = dyn_cast( - KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); - ConstantInt *Mode = - dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); + ConstantInt *UseStateMachineC = + KernelInfo::getUseGenericStateMachineFromKernelEnvironment( + ExistingKernelEnvC); + ConstantInt *ModeC = + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); // If we are stuck with generic mode, try to create a custom device (=GPU) // state machine which is specialized for the parallel regions that are // reachable by the kernel. - if (!UseStateMachine || UseStateMachine->isZero() || !Mode || - (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) + if (UseStateMachineC->isZero() || + (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) return ChangeStatus::UNCHANGED; // If not SPMD mode, indicate we use a custom state machine now. - auto &Ctx = getAnchorValue().getContext(); - auto *FalseVal = ConstantInt::getBool(Ctx, false); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); + setUseGenericStateMachineOfKernelEnvironment( + ConstantInt::get(UseStateMachineC->getType(), false)); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel @@ -4221,6 +4297,7 @@ // UserCodeEntryBB: // user code // __kmpc_target_deinit(...) // + auto &Ctx = getAnchorValue().getContext(); Function *Kernel = getAssociatedFunction(); assert(Kernel && "Expected an associated function!"); @@ -4303,7 +4380,7 @@ StateMachineBeginBB->end()), DLoc)); - Value *Ident = KernelInitCB->getArgOperand(0); + Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC); Value *GTid = KernelInitCB; FunctionCallee BarrierFn = @@ -4430,6 +4507,46 @@ ChangeStatus updateImpl(Attributor &A) override { KernelInfoState StateBefore = getState(); + // When we leave this function this RAII will make sure the member + // KernelEnvC is updated properly depending on the state. That member is + // used for simplification of values and needs to be up to date at all + // times. + struct UpdateKernelEnvCRAII { + AAKernelInfoFunction &AA; + + UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {} + + ~UpdateKernelEnvCRAII() { + if (!AA.KernelEnvC) + return; + + ConstantStruct *ExistingKernelEnvC = + KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB); + + if (!AA.isValidState()) { + AA.KernelEnvC = ExistingKernelEnvC; + return; + } + + if (!AA.ReachedKnownParallelRegions.isValidState()) + AA.setUseGenericStateMachineOfKernelEnvironment( + KernelInfo::getUseGenericStateMachineFromKernelEnvironment( + ExistingKernelEnvC)); + + if (!AA.SPMDCompatibilityTracker.isValidState()) + AA.setExecModeOfKernelEnvironment( + KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC)); + + ConstantInt *MayUseNestedParallelismC = + KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( + AA.KernelEnvC); + ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( + MayUseNestedParallelismC->getType(), AA.NestedParallelism); + AA.setMayUseNestedParallelismOfKernelEnvironment( + NewMayUseNestedParallelismC); + } + } RAII(*this); + // Callback to check a read/write instruction. auto CheckRWInst = [&](Instruction &I) { // We handle calls later. diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll --- a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll +++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll @@ -7,13 +7,13 @@ @_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { - %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) + %1 = tail call i32 @__kmpc_target_init(ptr null) ret void } -define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) { +define internal i32 @__kmpc_target_init(ptr %0) { store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 - %4 = call i1 @__kmpc_kernel_parallel() + %2 = call i1 @__kmpc_kernel_parallel() ret i32 0 } @@ -29,14 +29,14 @@ ; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef ;. ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() { -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init -; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-SAME: (ptr [[TMP0:%.*]]) { ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 -; CHECK-NEXT: [[TMP4:%.*]] = call i1 @__kmpc_kernel_parallel() +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @__kmpc_kernel_parallel() ; CHECK-NEXT: ret i32 0 ; ; diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -310,7 +310,7 @@ define weak_odr void @t3() { ; CHECK-LABEL: define {{[^@]+}}@t3() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noundef align 4294967296 null, i8 noundef 0, i1 noundef false, i1 noundef false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noundef align 4294967296 null) ; CHECK-NEXT: br label [[USER_CODE_ENTRY:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: br label [[FOR_COND:%.*]] @@ -321,7 +321,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr null, i8 0, i1 false, i1 false) + %0 = call i32 @__kmpc_target_init(ptr null) br label %user_code.entry user_code.entry: ; preds = %entry @@ -335,7 +335,7 @@ ret void } -declare i32 @__kmpc_target_init(ptr, i8, i1, i1) +declare i32 @__kmpc_target_init(ptr) define %S.2 @t3.helper() { ; CHECK-LABEL: define {{[^@]+}}@t3.helper() { @@ -380,22 +380,22 @@ ; TUNIT-NEXT: store i32 [[X]], ptr [[TMP]], align 4 ; TUNIT-NEXT: br label [[BB16:%.*]] ; TUNIT: bb16: -; TUNIT-NEXT: [[TRUETMP18:%.*]] = icmp eq i32 [[X]], 0 -; TUNIT-NEXT: br i1 [[TRUETMP18]], label [[BB35:%.*]], label [[BB19:%.*]] +; TUNIT-NEXT: [[TMP18:%.*]] = icmp eq i32 [[X]], 0 +; TUNIT-NEXT: br i1 [[TMP18]], label [[BB35:%.*]], label [[BB19:%.*]] ; TUNIT: bb19: -; TUNIT-NEXT: [[TRUETMP21:%.*]] = load float, ptr [[TMP]], align 4 -; TUNIT-NEXT: [[TRUETMP22:%.*]] = fadd fast float [[TRUETMP21]], 0.000000e+00 +; TUNIT-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP]], align 4 +; TUNIT-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], 0.000000e+00 ; TUNIT-NEXT: br label [[BB23:%.*]] ; TUNIT: bb23: -; TUNIT-NEXT: [[TRUETMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TRUETMP26:%.*]], [[BB34:%.*]] ] +; TUNIT-NEXT: [[TMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TMP26:%.*]], [[BB34:%.*]] ] ; TUNIT-NEXT: br label [[BB25:%.*]] ; TUNIT: bb25: -; TUNIT-NEXT: [[TRUETMP26]] = phi <2 x float> [ [[TRUETMP30:%.*]], [[BB28:%.*]] ], [ [[TRUETMP24]], [[BB23]] ] -; TUNIT-NEXT: [[TRUETMP27:%.*]] = icmp ult i32 undef, 8 -; TUNIT-NEXT: br i1 [[TRUETMP27]], label [[BB28]], label [[BB34]] +; TUNIT-NEXT: [[TMP26]] = phi <2 x float> [ [[TMP30:%.*]], [[BB28:%.*]] ], [ [[TMP24]], [[BB23]] ] +; TUNIT-NEXT: [[TMP27:%.*]] = icmp ult i32 undef, 8 +; TUNIT-NEXT: br i1 [[TMP27]], label [[BB28]], label [[BB34]] ; TUNIT: bb28: -; TUNIT-NEXT: [[TRUETMP29:%.*]] = insertelement <2 x float> [[TRUETMP26]], float undef, i32 0 -; TUNIT-NEXT: [[TRUETMP30]] = insertelement <2 x float> [[TRUETMP29]], float [[TRUETMP22]], i32 1 +; TUNIT-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP26]], float undef, i32 0 +; TUNIT-NEXT: [[TMP30]] = insertelement <2 x float> [[TMP29]], float [[TMP22]], i32 1 ; TUNIT-NEXT: br label [[BB25]] ; TUNIT: bb34: ; TUNIT-NEXT: br label [[BB23]] @@ -411,22 +411,22 @@ ; CGSCC-NEXT: store i32 [[X]], ptr [[TMP]], align 4 ; CGSCC-NEXT: br label [[BB16:%.*]] ; CGSCC: bb16: -; CGSCC-NEXT: [[TRUETMP18:%.*]] = icmp eq i32 [[X]], 0 -; CGSCC-NEXT: br i1 [[TRUETMP18]], label [[BB35:%.*]], label [[BB19:%.*]] +; CGSCC-NEXT: [[TMP18:%.*]] = icmp eq i32 [[X]], 0 +; CGSCC-NEXT: br i1 [[TMP18]], label [[BB35:%.*]], label [[BB19:%.*]] ; CGSCC: bb19: -; CGSCC-NEXT: [[TRUETMP21:%.*]] = load float, ptr [[TMP]], align 4 -; CGSCC-NEXT: [[TRUETMP22:%.*]] = fadd fast float [[TRUETMP21]], 0.000000e+00 +; CGSCC-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP]], align 4 +; CGSCC-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], 0.000000e+00 ; CGSCC-NEXT: br label [[BB23:%.*]] ; CGSCC: bb23: -; CGSCC-NEXT: [[TRUETMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TRUETMP26:%.*]], [[BB34:%.*]] ] +; CGSCC-NEXT: [[TMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TMP26:%.*]], [[BB34:%.*]] ] ; CGSCC-NEXT: br label [[BB25:%.*]] ; CGSCC: bb25: -; CGSCC-NEXT: [[TRUETMP26]] = phi <2 x float> [ [[TRUETMP30:%.*]], [[BB28:%.*]] ], [ [[TRUETMP24]], [[BB23]] ] -; CGSCC-NEXT: [[TRUETMP27:%.*]] = icmp ult i32 undef, 8 -; CGSCC-NEXT: br i1 [[TRUETMP27]], label [[BB28]], label [[BB34]] +; CGSCC-NEXT: [[TMP26]] = phi <2 x float> [ [[TMP30:%.*]], [[BB28:%.*]] ], [ [[TMP24]], [[BB23]] ] +; CGSCC-NEXT: [[TMP27:%.*]] = icmp ult i32 undef, 8 +; CGSCC-NEXT: br i1 [[TMP27]], label [[BB28]], label [[BB34]] ; CGSCC: bb28: -; CGSCC-NEXT: [[TRUETMP29:%.*]] = insertelement <2 x float> [[TRUETMP26]], float undef, i32 0 -; CGSCC-NEXT: [[TRUETMP30]] = insertelement <2 x float> [[TRUETMP29]], float [[TRUETMP22]], i32 1 +; CGSCC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP26]], float undef, i32 0 +; CGSCC-NEXT: [[TMP30]] = insertelement <2 x float> [[TMP29]], float [[TMP22]], i32 1 ; CGSCC-NEXT: br label [[BB25]] ; CGSCC: bb34: ; CGSCC-NEXT: br label [[BB23]] diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -736,9 +736,9 @@ declare i64 @__kmpc_shuffle_int64(i64, i16, i16); -declare void @__kmpc_target_deinit(ptr, i8); +declare void @__kmpc_target_deinit(); -declare i32 @__kmpc_target_init(ptr, i8, i1); +declare i32 @__kmpc_target_init(ptr); declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32); @@ -1389,10 +1389,10 @@ ; CHECK: declare i64 @__kmpc_shuffle_int64(i64, i16, i16) ; CHECK-NOT: Function Attrs -; CHECK: declare void @__kmpc_target_deinit(ptr, i8) +; CHECK: declare void @__kmpc_target_deinit() ; CHECK-NOT: Function Attrs -; CHECK: declare i32 @__kmpc_target_init(ptr, i8, i1) +; CHECK: declare i32 @__kmpc_target_init(ptr) ; CHECK-NOT: Function Attrs ; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32) @@ -2037,10 +2037,10 @@ ; OPTIMISTIC: declare i64 @__kmpc_shuffle_int64(i64, i16, i16) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare void @__kmpc_target_deinit(ptr, i8) +; OPTIMISTIC: declare void @__kmpc_target_deinit() ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr, i8, i1) +; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr) ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32) @@ -2698,10 +2698,10 @@ ; EXT: declare i64 @__kmpc_shuffle_int64(i64, i16 signext, i16 signext) ; EXT-NOT: Function Attrs -; EXT: declare void @__kmpc_target_deinit(ptr, i8 signext) +; EXT: declare void @__kmpc_target_deinit() ; EXT-NOT: Function Attrs -; EXT: declare signext i32 @__kmpc_target_init(ptr, i8 signext, i1 signext) +; EXT: declare signext i32 @__kmpc_target_init(ptr) ; EXT-NOT: Function Attrs ; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext) diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -1,19 +1,27 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals ; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1 -@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" @G = external global i8 -; Function Attrs: norecurse nounwind +@kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } + +; Function Attrs: convergent norecurse nounwind +;. +; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8 +; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null } +;. define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK: Function Attrs: norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_environment) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -24,13 +32,13 @@ ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: store i8 1, ptr @G, align 1 -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -42,7 +50,7 @@ %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() store i8 %isSPMD, ptr @G call void @bar() #2 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -51,9 +59,9 @@ declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(ptr, i8, i1) +declare i32 @__kmpc_target_init(ptr) -declare void @__kmpc_target_deinit(ptr, i8) +declare void @__kmpc_target_deinit() ; Function Attrs: convergent nounwind define hidden void @bar() #1 { @@ -83,3 +91,17 @@ !5 = !{i32 7, !"PIC Level", i32 2} !6 = !{i32 7, !"frame-pointer", i32 2} !7 = !{!"clang version 14.0.0"} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0} +; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"} +;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU ; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU-DISABLED @@ -120,28 +120,30 @@ ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 -@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" + +@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -149,7 +151,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -157,7 +159,7 @@ } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr, i8, i1) { +define weak i32 @__kmpc_target_init(ptr) { ret i32 0 } @@ -193,14 +195,14 @@ declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit(ptr, i8) +declare void @__kmpc_target_deinit() define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -208,7 +210,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -293,7 +295,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -301,7 +303,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -370,7 +372,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -378,7 +380,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -456,7 +458,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -464,7 +466,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -540,7 +542,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -548,7 +550,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -627,7 +629,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -635,7 +637,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -682,7 +684,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -690,7 +692,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -832,26 +834,101 @@ !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} +;. +; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; AMDGPU: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; AMDGPU: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +;. +; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; NVPTX: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +; NVPTX: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef +;. +; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; AMDGPU-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; NVPTX-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void ; ; ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-SAME: (ptr [[TMP0:%.*]]) { ; AMDGPU-NEXT: ret i32 0 ; ; @@ -870,7 +947,7 @@ ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -879,7 +956,7 @@ ; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -907,7 +984,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -953,7 +1030,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1026,7 +1103,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1078,7 +1155,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1168,7 +1245,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1216,7 +1293,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1288,7 +1365,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1334,7 +1411,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1406,7 +1483,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1452,7 +1529,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1524,7 +1601,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1564,7 +1641,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1630,7 +1707,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1666,7 +1743,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1784,20 +1861,20 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void ; ; ; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-SAME: (ptr [[TMP0:%.*]]) { ; NVPTX-NEXT: ret i32 0 ; ; @@ -1816,7 +1893,7 @@ ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -1825,7 +1902,7 @@ ; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -1853,7 +1930,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -1898,7 +1975,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1971,7 +2048,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2022,7 +2099,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2112,7 +2189,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2159,7 +2236,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2231,7 +2308,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2276,7 +2353,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2348,7 +2425,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2393,7 +2470,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2465,7 +2542,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2504,7 +2581,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2570,7 +2647,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2605,7 +2682,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-NEXT: call void @__kmpc_target_deinit() ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2723,20 +2800,20 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) { ; AMDGPU-DISABLED-NEXT: ret i32 0 ; ; @@ -2755,7 +2832,7 @@ ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -2764,7 +2841,7 @@ ; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] ; AMDGPU-DISABLED: omp_if.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2791,13 +2868,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2869,13 +2946,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2964,13 +3041,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3041,13 +3118,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3118,13 +3195,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3195,13 +3272,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3266,13 +3343,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit() ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3390,20 +3467,20 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init -; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) { ; NVPTX-DISABLED-NEXT: ret i32 0 ; ; @@ -3422,7 +3499,7 @@ ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized ; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] @@ -3431,7 +3508,7 @@ ; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] ; NVPTX-DISABLED: omp_if.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3458,13 +3535,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3536,13 +3613,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3631,13 +3708,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3708,13 +3785,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3785,13 +3862,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3862,13 +3939,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3933,13 +4010,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit() ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4050,3 +4127,136 @@ ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; +;. +; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR3]] = { nounwind } +; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU: attributes #[[ATTR9]] = { convergent nounwind } +; AMDGPU: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; AMDGPU: attributes #[[ATTR11]] = { convergent } +;. +; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR3]] = { nounwind } +; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX: attributes #[[ATTR9]] = { convergent nounwind } +; NVPTX: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; NVPTX: attributes #[[ATTR11]] = { convergent } +;. +; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent } +;. +; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } +; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent } +;. +; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU -; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU -; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX -; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU1 +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX1 +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU2 +; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU3 +; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX2 +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX3 ;; void p0(void); ;; void p1(void); @@ -122,28 +122,29 @@ ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 -@__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1 -@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8 @G = external global i32, align 4 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8 -@llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata" +@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } +@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null } define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -151,7 +152,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -159,7 +160,7 @@ } ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO. -declare i32 @__kmpc_target_init(ptr, i8, i1); +declare i32 @__kmpc_target_init(ptr); define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 { entry: @@ -193,14 +194,14 @@ declare i32 @__kmpc_global_thread_num(ptr) #3 -declare void @__kmpc_target_deinit(ptr, i8) +declare void @__kmpc_target_deinit() define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -208,7 +209,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -293,7 +294,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -301,7 +302,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -370,7 +371,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -378,7 +379,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -456,7 +457,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -464,7 +465,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -540,7 +541,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -548,7 +549,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -627,7 +628,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -635,7 +636,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -682,7 +683,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, ptr %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -690,7 +691,7 @@ %1 = call i32 @__kmpc_global_thread_num(ptr @1) store i32 %1, ptr %.threadid_temp., align 4 call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3 - call void @__kmpc_target_deinit(ptr @1, i8 1) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry @@ -832,1326 +833,4258 @@ !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU-NEXT: br label [[OMP_IF_END]] -; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU-SAME: () #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; AMDGPU: omp_if.then: -; AMDGPU-NEXT: store i32 0, ptr @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; AMDGPU-NEXT: br label [[OMP_IF_END]] -; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; AMDGPU-SAME: () #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-SAME: () #[[ATTR6]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; AMDGPU-SAME: () #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU: if.then: -; AMDGPU-NEXT: br label [[RETURN:%.*]] -; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] -; AMDGPU-NEXT: br label [[RETURN]] -; AMDGPU: return: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; AMDGPU: if.then: -; AMDGPU-NEXT: br label [[RETURN:%.*]] -; AMDGPU: if.end: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] -; AMDGPU-NEXT: br label [[RETURN]] -; AMDGPU: return: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU-SAME: () #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; AMDGPU-NEXT: ret void -; AMDGPU: worker.exit: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU-SAME: () #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU-SAME: () #[[ATTR6]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline nounwind -; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; AMDGPU-SAME: () #[[ATTR1]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR11]] -; AMDGPU-NEXT: ret void -; -; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind -; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, ptr @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX-NEXT: br label [[OMP_IF_END]] -; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX-SAME: () #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; NVPTX: omp_if.then: -; NVPTX-NEXT: store i32 0, ptr @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) -; NVPTX-NEXT: br label [[OMP_IF_END]] -; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-SAME: () #[[ATTR6:[0-9]+]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; NVPTX-SAME: () #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-SAME: () #[[ATTR6]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; NVPTX-SAME: () #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX: if.then: -; NVPTX-NEXT: br label [[RETURN:%.*]] -; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] -; NVPTX-NEXT: br label [[RETURN]] -; NVPTX: return: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; NVPTX: if.then: -; NVPTX-NEXT: br label [[RETURN:%.*]] -; NVPTX: if.end: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] -; NVPTX-NEXT: br label [[RETURN]] -; NVPTX: return: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX-SAME: () #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) -; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) -; NVPTX-NEXT: ret void -; NVPTX: worker.exit: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX-SAME: () #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX-SAME: () #[[ATTR6]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline nounwind -; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; NVPTX-SAME: () #[[ATTR1]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR11]] -; NVPTX-NEXT: ret void -; -; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind -; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: ret void -; +;. +; AMDGPU1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; AMDGPU1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; NVPTX1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; NVPTX1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; AMDGPU2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; AMDGPU2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; AMDGPU3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; AMDGPU3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; AMDGPU3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; NVPTX2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; NVPTX2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; NVPTX3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; NVPTX3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4 +; NVPTX3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8 +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null } +; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null } +;. +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; AMDGPU1-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__ +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; AMDGPU1-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU1: omp_if.then: +; AMDGPU1-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU1-NEXT: br label [[OMP_IF_END]] +; AMDGPU1: omp_if.end: +; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; AMDGPU1-SAME: () #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU1: omp_if.then: +; AMDGPU1-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU1-NEXT: br label [[OMP_IF_END]] +; AMDGPU1: omp_if.end: +; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; AMDGPU1-SAME: () #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; AMDGPU1-SAME: () #[[ATTR6]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; AMDGPU1-SAME: () #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU1: if.then: +; AMDGPU1-NEXT: br label [[RETURN:%.*]] +; AMDGPU1: if.end: +; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; AMDGPU1-NEXT: br label [[RETURN]] +; AMDGPU1: return: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU1: if.then: +; AMDGPU1-NEXT: br label [[RETURN:%.*]] +; AMDGPU1: if.end: +; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; AMDGPU1-NEXT: br label [[RETURN]] +; AMDGPU1: return: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; AMDGPU1-SAME: () #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU1: user_code.entry: +; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_target_deinit() +; AMDGPU1-NEXT: ret void +; AMDGPU1: worker.exit: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@weak_callee_empty +; AMDGPU1-SAME: () #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; AMDGPU1-SAME: () #[[ATTR6]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; AMDGPU1-SAME: () #[[ATTR1]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19 +; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU1-NEXT: ret void +; +; +; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU1-NEXT: entry: +; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; NVPTX1-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__ +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; NVPTX1-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX1: omp_if.then: +; NVPTX1-NEXT: store i32 0, ptr @G, align 4 +; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX1-NEXT: br label [[OMP_IF_END]] +; NVPTX1: omp_if.end: +; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; NVPTX1-SAME: () #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX1: omp_if.then: +; NVPTX1-NEXT: store i32 0, ptr @G, align 4 +; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX1-NEXT: br label [[OMP_IF_END]] +; NVPTX1: omp_if.end: +; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p1() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; NVPTX1-SAME: () #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p1() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; NVPTX1-SAME: () #[[ATTR6]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; NVPTX1-SAME: () #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p1() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p1() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p1() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX1: if.then: +; NVPTX1-NEXT: br label [[RETURN:%.*]] +; NVPTX1: if.end: +; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; NVPTX1-NEXT: br label [[RETURN]] +; NVPTX1: return: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX1: if.then: +; NVPTX1-NEXT: br label [[RETURN:%.*]] +; NVPTX1: if.end: +; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; NVPTX1-NEXT: br label [[RETURN]] +; NVPTX1: return: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; NVPTX1-SAME: () #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX1: user_code.entry: +; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_target_deinit() +; NVPTX1-NEXT: ret void +; NVPTX1: worker.exit: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@weak_callee_empty +; NVPTX1-SAME: () #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; NVPTX1-SAME: () #[[ATTR6]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline nounwind +; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; NVPTX1-SAME: () #[[ATTR1]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19 +; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @p0() #[[ATTR11]] +; NVPTX1-NEXT: ret void +; +; +; NVPTX1: Function Attrs: convergent noinline norecurse nounwind +; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX1-NEXT: entry: +; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX1-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; AMDGPU2-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__ +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; AMDGPU2-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU2: omp_if.then: +; AMDGPU2-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU2-NEXT: br label [[OMP_IF_END]] +; AMDGPU2: omp_if.end: +; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; AMDGPU2-SAME: () #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU2: omp_if.then: +; AMDGPU2-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU2-NEXT: br label [[OMP_IF_END]] +; AMDGPU2: omp_if.end: +; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; AMDGPU2-SAME: () #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; AMDGPU2-SAME: () #[[ATTR6]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; AMDGPU2-SAME: () #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU2: if.then: +; AMDGPU2-NEXT: br label [[RETURN:%.*]] +; AMDGPU2: if.end: +; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; AMDGPU2-NEXT: br label [[RETURN]] +; AMDGPU2: return: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU2: if.then: +; AMDGPU2-NEXT: br label [[RETURN:%.*]] +; AMDGPU2: if.end: +; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; AMDGPU2-NEXT: br label [[RETURN]] +; AMDGPU2: return: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; AMDGPU2-SAME: () #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU2: user_code.entry: +; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_target_deinit() +; AMDGPU2-NEXT: ret void +; AMDGPU2: worker.exit: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@weak_callee_empty +; AMDGPU2-SAME: () #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; AMDGPU2-SAME: () #[[ATTR6]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; AMDGPU2-SAME: () #[[ATTR1]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19 +; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU2-NEXT: entry: +; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU2-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; AMDGPU3-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__ +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; AMDGPU3-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU3: omp_if.then: +; AMDGPU3-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU3-NEXT: br label [[OMP_IF_END]] +; AMDGPU3: omp_if.end: +; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; AMDGPU3-SAME: () #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU3: omp_if.then: +; AMDGPU3-NEXT: store i32 0, ptr @G, align 4 +; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU3-NEXT: br label [[OMP_IF_END]] +; AMDGPU3: omp_if.end: +; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; AMDGPU3-SAME: () #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; AMDGPU3-SAME: () #[[ATTR6]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; AMDGPU3-SAME: () #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p1() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU3: if.then: +; AMDGPU3-NEXT: br label [[RETURN:%.*]] +; AMDGPU3: if.end: +; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; AMDGPU3-NEXT: br label [[RETURN]] +; AMDGPU3: return: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU3: if.then: +; AMDGPU3-NEXT: br label [[RETURN:%.*]] +; AMDGPU3: if.end: +; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; AMDGPU3-NEXT: br label [[RETURN]] +; AMDGPU3: return: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; AMDGPU3-SAME: () #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU3: user_code.entry: +; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_target_deinit() +; AMDGPU3-NEXT: ret void +; AMDGPU3: worker.exit: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@weak_callee_empty +; AMDGPU3-SAME: () #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; AMDGPU3-SAME: () #[[ATTR6]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; AMDGPU3-SAME: () #[[ATTR1]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19 +; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @p0() #[[ATTR11]] +; AMDGPU3-NEXT: ret void +; +; +; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU3-NEXT: entry: +; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU3-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; NVPTX2-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__ +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; NVPTX2-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX2: omp_if.then: +; NVPTX2-NEXT: store i32 0, ptr @G, align 4 +; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX2-NEXT: br label [[OMP_IF_END]] +; NVPTX2: omp_if.end: +; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; NVPTX2-SAME: () #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX2: omp_if.then: +; NVPTX2-NEXT: store i32 0, ptr @G, align 4 +; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX2-NEXT: br label [[OMP_IF_END]] +; NVPTX2: omp_if.end: +; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p1() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; NVPTX2-SAME: () #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p1() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; NVPTX2-SAME: () #[[ATTR6]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; NVPTX2-SAME: () #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p1() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p1() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p1() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX2: if.then: +; NVPTX2-NEXT: br label [[RETURN:%.*]] +; NVPTX2: if.end: +; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; NVPTX2-NEXT: br label [[RETURN]] +; NVPTX2: return: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX2: if.then: +; NVPTX2-NEXT: br label [[RETURN:%.*]] +; NVPTX2: if.end: +; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; NVPTX2-NEXT: br label [[RETURN]] +; NVPTX2: return: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; NVPTX2-SAME: () #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX2: user_code.entry: +; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_target_deinit() +; NVPTX2-NEXT: ret void +; NVPTX2: worker.exit: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@weak_callee_empty +; NVPTX2-SAME: () #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; NVPTX2-SAME: () #[[ATTR6]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline nounwind +; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; NVPTX2-SAME: () #[[ATTR1]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19 +; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @p0() #[[ATTR11]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX2: Function Attrs: convergent noinline norecurse nounwind +; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX2-NEXT: entry: +; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX2-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; NVPTX3-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__ +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]] +; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; NVPTX3-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX3: omp_if.then: +; NVPTX3-NEXT: store i32 0, ptr @G, align 4 +; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX3-NEXT: br label [[OMP_IF_END]] +; NVPTX3: omp_if.end: +; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; NVPTX3-SAME: () #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX3: omp_if.then: +; NVPTX3-NEXT: store i32 0, ptr @G, align 4 +; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) +; NVPTX3-NEXT: br label [[OMP_IF_END]] +; NVPTX3: omp_if.end: +; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p1() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]] +; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; NVPTX3-SAME: () #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p1() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; NVPTX3-SAME: () #[[ATTR6]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; NVPTX3-SAME: () #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p1() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p1() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p1() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]] +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX3: if.then: +; NVPTX3-NEXT: br label [[RETURN:%.*]] +; NVPTX3: if.end: +; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]] +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]] +; NVPTX3-NEXT: br label [[RETURN]] +; NVPTX3: return: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX3: if.then: +; NVPTX3-NEXT: br label [[RETURN:%.*]] +; NVPTX3: if.end: +; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]] +; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]] +; NVPTX3-NEXT: br label [[RETURN]] +; NVPTX3: return: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; NVPTX3-SAME: () #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment) +; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX3: user_code.entry: +; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_target_deinit() +; NVPTX3-NEXT: ret void +; NVPTX3: worker.exit: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@weak_callee_empty +; NVPTX3-SAME: () #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; NVPTX3-SAME: () #[[ATTR6]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline nounwind +; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; NVPTX3-SAME: () #[[ATTR1]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19 +; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @p0() #[[ATTR11]] +; NVPTX3-NEXT: ret void +; +; +; NVPTX3: Function Attrs: convergent noinline norecurse nounwind +; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX3-NEXT: entry: +; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 +; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX3-NEXT: ret void +; +;. +; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR3]] = { nounwind } +; AMDGPU1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; AMDGPU1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU1: attributes #[[ATTR9]] = { convergent nounwind } +; AMDGPU1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; AMDGPU1: attributes #[[ATTR11]] = { convergent } +;. +; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR3]] = { nounwind } +; NVPTX1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; NVPTX1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX1: attributes #[[ATTR9]] = { convergent nounwind } +; NVPTX1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; NVPTX1: attributes #[[ATTR11]] = { convergent } +;. +; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR3]] = { nounwind } +; AMDGPU2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; AMDGPU2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU2: attributes #[[ATTR9]] = { convergent nounwind } +; AMDGPU2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; AMDGPU2: attributes #[[ATTR11]] = { convergent } +;. +; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR3]] = { nounwind } +; AMDGPU3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; AMDGPU3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; AMDGPU3: attributes #[[ATTR9]] = { convergent nounwind } +; AMDGPU3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; AMDGPU3: attributes #[[ATTR11]] = { convergent } +;. +; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR3]] = { nounwind } +; NVPTX2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; NVPTX2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX2: attributes #[[ATTR9]] = { convergent nounwind } +; NVPTX2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; NVPTX2: attributes #[[ATTR11]] = { convergent } +;. +; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR3]] = { nounwind } +; NVPTX3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline } +; NVPTX3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; NVPTX3: attributes #[[ATTR9]] = { convergent nounwind } +; NVPTX3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" } +; NVPTX3: attributes #[[ATTR11]] = { convergent } +;. +; AMDGPU1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; AMDGPU1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; AMDGPU1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; AMDGPU1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; AMDGPU1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; NVPTX1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; NVPTX1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; NVPTX1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; AMDGPU2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; AMDGPU2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; AMDGPU2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; AMDGPU3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; AMDGPU3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; AMDGPU3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; NVPTX2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; NVPTX2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; NVPTX2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. +; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2} +; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4} +; NVPTX3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0} +; NVPTX3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3} +; NVPTX3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5} +; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6} +; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7} +; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1} +; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1} +; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1} +; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1} +; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1} +; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1} +; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1} +; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1} +; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1} +; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -36,6 +36,8 @@ ;; } %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @@ -43,24 +45,24 @@ @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 @4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 -@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1 @6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 @8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1 @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 @10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 @11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 -@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1 @12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1 @13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 @G = external global i32 -@llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata" +@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } +@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null } + ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -75,12 +77,12 @@ call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28 + call void @__kmpc_target_deinit() #3, !dbg !28 br label %common.ret } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(ptr, i8, i1) { +define weak i32 @__kmpc_target_init(ptr) { ret i32 0 } @@ -99,13 +101,13 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr +declare void @__kmpc_target_deinit() local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -128,7 +130,7 @@ call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 call void @no_openmp() call void @no_parallelism() - call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46 + call void @__kmpc_target_deinit() #3, !dbg !46 br label %common.ret } diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -5,12 +5,13 @@ target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8 -@__omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0 -@llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata" +@__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null } declare void @use(i32) @@ -19,37 +20,37 @@ ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false) + %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(ptr @2) %2 = call i32 @__kmpc_global_thread_num(ptr @2) - call void @__kmpc_target_deinit(ptr @1, i8 2) + call void @__kmpc_target_deinit() ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(ptr, i8, i1) +declare i32 @__kmpc_target_init(ptr) declare i32 @__kmpc_global_thread_num(ptr) #1 -declare void @__kmpc_target_deinit(ptr, i8) +declare void @__kmpc_target_deinit() attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -2,26 +2,27 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, ptr } - -@kernel0_exec_mode = weak constant i8 1 +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @G = external global i32 + +@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } +@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } +@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null } + ;. -; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 -; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 -; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 -; CHECK: @[[KERNEL0_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 -; CHECK: @[[KERNEL1_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 -; CHECK: @[[KERNEL2_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[KERNEL0_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } +; CHECK: @[[KERNEL1_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } +; CHECK: @[[KERNEL2_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null } ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8 ;. define weak void @kernel0() "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -31,24 +32,21 @@ ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; - - %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(ptr null, i8 1) + call void @__kmpc_target_deinit() ret void } -@kernel1_exec_mode = weak constant i8 1 - define weak void @kernel1() "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -56,24 +54,21 @@ ; CHECK-NEXT: ret void ; CHECK: main.thread.user_code: ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; - - %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false) + %i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment) call void @helper1() - call void @__kmpc_target_deinit(ptr null, i8 1) + call void @__kmpc_target_deinit() ret void } -@kernel2_exec_mode = weak constant i8 1 - define weak void @kernel2() "kernel" #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -84,12 +79,12 @@ ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x ptr], align 8 - %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true) + %i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -102,7 +97,7 @@ call void @helper1() call void @helper2() call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0) - call void @__kmpc_target_deinit(ptr null, i8 1) + call void @__kmpc_target_deinit() ret void } @@ -198,8 +193,8 @@ ret i32 %ret } declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() -declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1 -declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1 +declare i32 @__kmpc_target_init(ptr) #1 +declare void @__kmpc_target_deinit() #1 declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) declare i32 @__kmpc_global_thread_num(ptr) @@ -214,7 +209,6 @@ !2 = !{ptr @kernel0, !"kernel", i32 1} !3 = !{ptr @kernel1, !"kernel", i32 1} !4 = !{ptr @kernel2, !"kernel", i32 1} -; ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" } ; CHECK: attributes #[[ATTR1]] = { nounwind } diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll @@ -2,28 +2,22 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s target triple = "nvptx64" -%struct.ident_t = type { i32, i32, i32, i32, ptr } - -@kernel0_exec_mode = weak constant i8 1 - @G = external global i32 + ;. -; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 -; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 -; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) + %i = call i32 @__kmpc_target_init(ptr null) call void @helper0() call void @helper1() call void @helper2() @@ -31,24 +25,20 @@ ret void } -@kernel1_exec_mode = weak constant i8 1 - define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null) ; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false) + %i = call i32 @__kmpc_target_init(ptr null) call void @helper1() - call void @__kmpc_target_deinit(ptr null, i1 false) + call void @__kmpc_target_deinit() ret void } -@kernel2_exec_mode = weak constant i8 1 - define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { @@ -56,14 +46,14 @@ ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit() ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(ptr null, i1 false) + call void @__kmpc_target_deinit() ret void } @@ -112,8 +102,8 @@ } declare i32 @__kmpc_get_hardware_num_threads_in_block() -declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1 +declare i32 @__kmpc_target_init(ptr) #1 +declare void @__kmpc_target_deinit() #1 !llvm.module.flags = !{!0, !1} diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll --- a/llvm/test/Transforms/OpenMP/global_constructor.ll +++ b/llvm/test/Transforms/OpenMP/global_constructor.ll @@ -2,15 +2,17 @@ ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, ptr } +%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } +%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 } @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 @_ZL6Device = internal global double 0.000000e+00, align 8 -@__omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0 +@__omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null } define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" { entry: - %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0 + %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_ke