diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2036,7 +2036,8 @@ UndefValue::get(Int8Ty), F->getName() + ".ID"); for (Use *U : ToBeReplacedStateMachineUses) - U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); + U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( + ID, U->get()->getType())); ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; @@ -3422,10 +3423,14 @@ IsWorker->setDebugLoc(DLoc); BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); + // Create local storage for the work function pointer. + const DataLayout &DL = M.getDataLayout(); Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); - AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr", - &Kernel->getEntryBlock().front()); + Instruction *WorkFnAI = + new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, + "worker.work_fn.addr", &Kernel->getEntryBlock().front()); WorkFnAI->setDebugLoc(DLoc); auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -3438,13 +3443,23 @@ Value *Ident = KernelInitCB->getArgOperand(0); Value *GTid = KernelInitCB; - Module &M = *Kernel->getParent(); FunctionCallee BarrierFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_barrier_simple_spmd); CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) ->setDebugLoc(DLoc); + if (WorkFnAI->getType()->getPointerAddressSpace() != + (unsigned int)AddressSpace::Generic) { + WorkFnAI = new AddrSpaceCastInst( + WorkFnAI, + PointerType::getWithSamePointeeType( + cast(WorkFnAI->getType()), + (unsigned int)AddressSpace::Generic), + WorkFnAI->getName() + ".generic", StateMachineBeginBB); + WorkFnAI->setDebugLoc(DLoc); + } + FunctionCallee KernelParallelFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_kernel_parallel); diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs -; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s -; RUN: opt -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED +; RUN: opt --mtriple=amdgcn-amd-amdhsa -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=ALL,AMDGPU +; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=ALL,NVPTX +; RUN: opt --mtriple=amdgcn-amd-amdhsa -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=CHECK-DISABLED +; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=CHECK-DISABLED ;; void p0(void); ;; void p1(void); @@ -117,7 +119,7 @@ ;; { weak_callee_empty(); } ;; } -target triple = "nvptx64" +;; target triple = "nvptx64" %struct.ident_t = type { i32, i32, i32, i32, i8* } @@ -841,967 +843,5782 @@ !16 = !{i32 1, !"wchar_size", i32 4} !17 = !{i32 7, !"openmp", i32 50} !18 = !{i32 7, !"openmp-device", i32 50} -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; CHECK: omp_if.then: -; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] -; CHECK-NEXT: br label [[OMP_IF_END]] -; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -; CHECK: omp_if.then: -; CHECK-NEXT: store i32 0, i32* @G, align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[OMP_IF_END]] -; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] -; CHECK: worker_state_machine.parallel_region.execute2: -; CHECK-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.check3: -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9:[0-9]+]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] -; CHECK: worker_state_machine.parallel_region.execute2: -; CHECK-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.check3: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] -; CHECK: worker_state_machine.parallel_region.execute5: -; CHECK-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.check6: -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__4 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; CHECK: worker_state_machine.parallel_region.execute2: -; CHECK-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.fallback.execute: -; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] -; CHECK: worker_state_machine.parallel_region.execute2: -; CHECK-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.check3: -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__9 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.check1: -; CHECK-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] -; CHECK: worker_state_machine.parallel_region.execute2: -; CHECK-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.check3: -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__12 -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.check: -; CHECK-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper -; CHECK-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] -; CHECK: worker_state_machine.parallel_region.execute: -; CHECK-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.fallback.execute: -; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__15 -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]] -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after -; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[RETURN:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) -; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] -; CHECK: worker_state_machine.begin: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* -; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null -; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] -; CHECK: worker_state_machine.finished: -; CHECK-NEXT: ret void -; CHECK: worker_state_machine.is_active.check: -; CHECK-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] -; CHECK: worker_state_machine.parallel_region.fallback.execute: -; CHECK-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] -; CHECK: worker_state_machine.parallel_region.end: -; CHECK-NEXT: call void @__kmpc_kernel_end_parallel() -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] -; CHECK: worker_state_machine.done.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] -; CHECK: thread.user_code.check: -; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) -; CHECK-NEXT: ret void -; CHECK: worker.exit: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__16 -; CHECK-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @weak_callee_empty() #[[ATTR7]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@weak_callee_empty -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline nounwind -; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] -; CHECK-NEXT: ret void -; -; -; CHECK: Function Attrs: convergent noinline norecurse nounwind -; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper -; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -; CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; CHECK-NEXT: ret void +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; ALL-SAME: () #[[ATTR0:[0-9]+]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; ALL-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__ +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; ALL-SAME: () #[[ATTR1:[0-9]+]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__1 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; ALL: worker_state_machine.parallel_region.execute5: +; ALL-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check6: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__4 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__6 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__9 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__12 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__15 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__16 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@weak_callee_empty +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__ +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__1 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; ALL: worker_state_machine.parallel_region.execute5: +; ALL-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check6: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__4 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__6 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__9 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__12 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__15 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__16 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@weak_callee_empty +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ +; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU: omp_if.then: +; AMDGPU-NEXT: store i32 0, i32* @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: br label [[OMP_IF_END]] +; AMDGPU: omp_if.end: +; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; AMDGPU: omp_if.then: +; AMDGPU-NEXT: store i32 0, i32* @G, align 4 +; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[OMP_IF_END]] +; AMDGPU: omp_if.end: +; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check1: +; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute2: +; AMDGPU-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.check3: +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 +; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check1: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute2: +; AMDGPU-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.check3: +; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute5: +; AMDGPU-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.check6: +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 +; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check1: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute2: +; AMDGPU-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 +; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check1: +; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute2: +; AMDGPU-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.check3: +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 +; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check1: +; AMDGPU-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute2: +; AMDGPU-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.check3: +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; AMDGPU-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 +; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.check: +; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; AMDGPU: worker_state_machine.parallel_region.execute: +; AMDGPU-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 +; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU: if.then: +; AMDGPU-NEXT: br label [[RETURN:%.*]] +; AMDGPU: if.end: +; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: br label [[RETURN]] +; AMDGPU: return: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; AMDGPU: if.then: +; AMDGPU-NEXT: br label [[RETURN:%.*]] +; AMDGPU: if.end: +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; AMDGPU-NEXT: br label [[RETURN]] +; AMDGPU: return: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; AMDGPU: worker_state_machine.begin: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; AMDGPU: worker_state_machine.finished: +; AMDGPU-NEXT: ret void +; AMDGPU: worker_state_machine.is_active.check: +; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; AMDGPU: worker_state_machine.parallel_region.fallback.execute: +; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; AMDGPU: worker_state_machine.parallel_region.end: +; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; AMDGPU: worker_state_machine.done.barrier: +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; AMDGPU: thread.user_code.check: +; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; AMDGPU: user_code.entry: +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; AMDGPU-NEXT: ret void +; AMDGPU: worker.exit: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 +; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 +; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: ret void +; +; +; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; AMDGPU-NEXT: entry: +; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; AMDGPU-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; AMDGPU-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__ +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__1 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; ALL: worker_state_machine.parallel_region.execute5: +; ALL-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check6: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__4 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__6 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__9 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__12 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__15 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__16 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@weak_callee_empty +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__ +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; ALL: omp_if.then: +; ALL-NEXT: store i32 0, i32* @G, align 4 +; ALL-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; ALL-NEXT: br label [[OMP_IF_END]] +; ALL: omp_if.end: +; ALL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__1 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; ALL: worker_state_machine.parallel_region.execute5: +; ALL-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check6: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__4 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; ALL-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__6 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__9 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.check1: +; ALL-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; ALL: worker_state_machine.parallel_region.execute2: +; ALL-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.check3: +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; ALL-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__12 +; ALL-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; ALL-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p1() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.check: +; ALL-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; ALL-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; ALL: worker_state_machine.parallel_region.execute: +; ALL-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__15 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; ALL-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; ALL-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; ALL: if.then: +; ALL-NEXT: br label [[RETURN:%.*]] +; ALL: if.end: +; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; ALL-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; ALL-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; ALL-NEXT: br label [[RETURN]] +; ALL: return: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; ALL-SAME: () #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; ALL-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; ALL: worker_state_machine.begin: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; ALL-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; ALL-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; ALL-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; ALL-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; ALL: worker_state_machine.finished: +; ALL-NEXT: ret void +; ALL: worker_state_machine.is_active.check: +; ALL-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; ALL: worker_state_machine.parallel_region.fallback.execute: +; ALL-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; ALL: worker_state_machine.parallel_region.end: +; ALL-NEXT: call void @__kmpc_kernel_end_parallel() +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; ALL: worker_state_machine.done.barrier: +; ALL-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; ALL-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; ALL: thread.user_code.check: +; ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; ALL-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; ALL: user_code.entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; ALL-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; ALL-NEXT: ret void +; ALL: worker.exit: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__16 +; ALL-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@weak_callee_empty +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline nounwind +; ALL-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; ALL-SAME: () #[[ATTR1]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; ALL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; ALL-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; ALL-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19 +; ALL-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; ALL-NEXT: call void @p0() #[[ATTR10]] +; ALL-NEXT: ret void +; +; +; ALL: Function Attrs: convergent noinline norecurse nounwind +; ALL-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; ALL-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; ALL-NEXT: entry: +; ALL-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; ALL-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; ALL-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; ALL-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; ALL-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; ALL-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; ALL-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; ALL-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; ALL-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ +; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX: omp_if.then: +; NVPTX-NEXT: store i32 0, i32* @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: br label [[OMP_IF_END]] +; NVPTX: omp_if.end: +; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; NVPTX: omp_if.then: +; NVPTX-NEXT: store i32 0, i32* @G, align 4 +; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[OMP_IF_END]] +; NVPTX: omp_if.end: +; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.check1: +; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute2: +; NVPTX-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.check3: +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 +; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.check1: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute2: +; NVPTX-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.check3: +; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute5: +; NVPTX-NEXT: call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.check6: +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 +; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.check1: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__8_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute2: +; NVPTX-NEXT: call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 +; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__10_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.check1: +; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute2: +; NVPTX-NEXT: call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.check3: +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 +; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__13_wrapper.ID to void (i16, i32)*) +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__13_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.check1: +; NVPTX-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute2: +; NVPTX-NEXT: call void @__omp_outlined__14_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.check3: +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; NVPTX-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 +; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.check: +; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__19_wrapper +; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] +; NVPTX: worker_state_machine.parallel_region.execute: +; NVPTX-NEXT: call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 +; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX: if.then: +; NVPTX-NEXT: br label [[RETURN:%.*]] +; NVPTX: if.end: +; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: br label [[RETURN]] +; NVPTX: return: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; NVPTX: if.then: +; NVPTX-NEXT: br label [[RETURN:%.*]] +; NVPTX: if.end: +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; NVPTX-NEXT: br label [[RETURN]] +; NVPTX: return: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) +; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] +; NVPTX: worker_state_machine.begin: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) +; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* +; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null +; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] +; NVPTX: worker_state_machine.finished: +; NVPTX-NEXT: ret void +; NVPTX: worker_state_machine.is_active.check: +; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] +; NVPTX: worker_state_machine.parallel_region.fallback.execute: +; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] +; NVPTX: worker_state_machine.parallel_region.end: +; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] +; NVPTX: worker_state_machine.done.barrier: +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] +; NVPTX: thread.user_code.check: +; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; NVPTX: user_code.entry: +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; NVPTX-NEXT: ret void +; NVPTX: worker.exit: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 +; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 +; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: ret void +; +; +; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; NVPTX-NEXT: entry: +; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; NVPTX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; NVPTX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: ret void ; ; ; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind @@ -1828,8 +6645,8 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1895,11 +6712,11 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -1911,7 +6728,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1937,7 +6754,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1984,13 +6801,13 @@ ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2022,7 +6839,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2095,7 +6912,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -2107,7 +6924,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2133,7 +6950,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2184,7 +7001,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -2196,7 +7013,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2222,7 +7039,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2270,7 +7087,7 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) @@ -2285,7 +7102,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2311,7 +7128,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2355,8 +7172,8 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2372,8 +7189,8 @@ ; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] ; CHECK-DISABLED: if.end: ; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: br label [[RETURN]] ; CHECK-DISABLED: return: ; CHECK-DISABLED-NEXT: ret void @@ -2393,8 +7210,8 @@ ; CHECK-DISABLED: if.end: ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] ; CHECK-DISABLED-NEXT: br label [[RETURN]] ; CHECK-DISABLED: return: ; CHECK-DISABLED-NEXT: ret void @@ -2424,7 +7241,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2441,7 +7258,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2467,7 +7284,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2515,7 +7332,2200 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +; CHECK-DISABLED: omp_if.then: +; CHECK-DISABLED-NEXT: store i32 0, i32* @G, align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: br label [[OMP_IF_END]] +; CHECK-DISABLED: omp_if.end: +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after +; CHECK-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-DISABLED: if.then: +; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] +; CHECK-DISABLED: if.end: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; CHECK-DISABLED-NEXT: br label [[RETURN]] +; CHECK-DISABLED: return: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 +; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 true, i1 true) +; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK-DISABLED: user_code.entry: +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) +; CHECK-DISABLED-NEXT: ret void +; CHECK-DISABLED: worker.exit: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper +; CHECK-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +; CHECK-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +; CHECK-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after +; CHECK-DISABLED-SAME: () #[[ATTR1]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-DISABLED-NEXT: ret void +; +; +; CHECK-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 +; CHECK-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-DISABLED-NEXT: entry: +; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -124,9 +124,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -135,7 +135,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5 ; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -144,8 +144,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -172,9 +173,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -227,7 +228,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -255,7 +256,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -297,7 +298,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -305,7 +306,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -331,7 +332,7 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -345,7 +346,7 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -381,9 +382,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -392,7 +393,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -401,8 +402,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -429,9 +431,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -467,7 +469,7 @@ ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; CHECK-NEXT: store i32 0, i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -485,7 +487,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -498,7 +500,7 @@ ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; CHECK-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; CHECK-DISABLED: for.cond: @@ -516,7 +518,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -566,7 +568,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -574,7 +576,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -598,7 +600,7 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -612,7 +614,7 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -640,9 +642,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -651,7 +653,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -660,8 +662,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -688,9 +691,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -743,7 +746,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -773,7 +776,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -826,7 +829,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -839,7 +842,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -873,7 +876,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -891,7 +894,7 @@ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -921,9 +924,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -932,7 +935,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -941,8 +944,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -969,9 +973,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -1039,7 +1043,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 @@ -1071,7 +1075,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1125,7 +1129,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1138,7 +1142,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1172,7 +1176,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1190,7 +1194,7 @@ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1214,7 +1218,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1222,8 +1226,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1244,8 +1249,8 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] +; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -1254,7 +1259,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -1263,8 +1268,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1285,8 +1291,8 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -1317,7 +1323,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 @@ -1325,7 +1331,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1369,19 +1375,21 @@ ;. ; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK: attributes #[[ATTR3]] = { nounwind } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind } -; CHECK: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; CHECK: attributes #[[ATTR6]] = { convergent } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; CHECK: attributes #[[ATTR4]] = { nounwind } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } +; CHECK: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; CHECK: attributes #[[ATTR7]] = { convergent } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK-DISABLED: attributes #[[ATTR3]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind } -; CHECK-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent } +; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } +; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } +; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; CHECK-DISABLED: attributes #[[ATTR7]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0}