diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2036,7 +2036,8 @@ UndefValue::get(Int8Ty), F->getName() + ".ID"); for (Use *U : ToBeReplacedStateMachineUses) - U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); + U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( + ID, U->get()->getType())); ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; @@ -3424,8 +3425,9 @@ // Create local storage for the work function pointer. Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); - AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr", - &Kernel->getEntryBlock().front()); + AllocaInst *WorkFnAI = + new AllocaInst(VoidPtrTy, (unsigned int)AddressSpace::Local, + "worker.work_fn.addr", &Kernel->getEntryBlock().front()); WorkFnAI->setDebugLoc(DLoc); auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -3445,14 +3447,23 @@ CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) ->setDebugLoc(DLoc); + Instruction *WorkFnAIGeneric = new AddrSpaceCastInst( + WorkFnAI, + PointerType::getWithSamePointeeType( + cast(WorkFnAI->getType()), + (unsigned int)AddressSpace::Generic), + WorkFnAI->getName() + ".generic", StateMachineBeginBB); + WorkFnAIGeneric->setDebugLoc(DLoc); + FunctionCallee KernelParallelFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_kernel_parallel); - Instruction *IsActiveWorker = CallInst::Create( - KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); + Instruction *IsActiveWorker = + CallInst::Create(KernelParallelFn, {WorkFnAIGeneric}, + "worker.is_active", StateMachineBeginBB); IsActiveWorker->setDebugLoc(DLoc); - Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", - StateMachineBeginBB); + Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAIGeneric, + "worker.work_fn", StateMachineBeginBB); WorkFn->setDebugLoc(DLoc); FunctionType *ParallelRegionFnTy = FunctionType::get( diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -865,8 +865,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: ret void ; ; @@ -908,7 +908,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -916,8 +916,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -966,11 +967,11 @@ ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void @@ -982,7 +983,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9:[0-9]+]] +; CHECK-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; CHECK-NEXT: ret void ; ; @@ -1008,7 +1009,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] +; CHECK-NEXT: call void @p1() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1032,7 +1033,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1040,8 +1041,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1095,13 +1097,13 @@ ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] -; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; CHECK-NEXT: ret void ; ; @@ -1133,7 +1135,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] +; CHECK-NEXT: call void @p1() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1179,7 +1181,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1187,8 +1189,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1242,7 +1245,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void @@ -1254,7 +1257,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1280,7 +1283,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] +; CHECK-NEXT: call void @p1() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1304,7 +1307,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1312,8 +1315,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1365,7 +1369,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-NEXT: ret void @@ -1377,7 +1381,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1403,7 +1407,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] +; CHECK-NEXT: call void @p1() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1427,7 +1431,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1435,8 +1439,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1485,7 +1490,7 @@ ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) @@ -1500,7 +1505,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1526,7 +1531,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p1() #[[ATTR9]] +; CHECK-NEXT: call void @p1() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1550,7 +1555,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1558,8 +1563,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1600,8 +1606,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; CHECK-NEXT: ret void ; ; @@ -1617,8 +1623,8 @@ ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void @@ -1638,8 +1644,8 @@ ; CHECK: if.end: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void @@ -1649,7 +1655,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1657,8 +1663,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1693,7 +1700,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @weak_callee_empty() #[[ATTR7]] +; CHECK-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; CHECK-NEXT: ret void ; ; @@ -1710,7 +1717,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1736,7 +1743,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1784,7 +1791,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @p0() #[[ATTR9]] +; CHECK-NEXT: call void @p0() #[[ATTR10]] ; CHECK-NEXT: ret void ; ; @@ -1828,8 +1835,8 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1895,11 +1902,11 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -1911,7 +1918,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1937,7 +1944,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -1984,13 +1991,13 @@ ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]] -; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2022,7 +2029,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2095,7 +2102,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR9]] +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -2107,7 +2114,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2133,7 +2140,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2184,7 +2191,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; CHECK-DISABLED-NEXT: ret void @@ -2196,7 +2203,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2222,7 +2229,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2270,7 +2277,7 @@ ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) @@ -2285,7 +2292,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2311,7 +2318,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p1() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2355,8 +2362,8 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR9]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]] +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @omp_get_thread_num to i32 ()*)() #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2372,8 +2379,8 @@ ; CHECK-DISABLED-NEXT: br label [[RETURN:%.*]] ; CHECK-DISABLED: if.end: ; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] ; CHECK-DISABLED-NEXT: br label [[RETURN]] ; CHECK-DISABLED: return: ; CHECK-DISABLED-NEXT: ret void @@ -2393,8 +2400,8 @@ ; CHECK-DISABLED: if.end: ; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 ; CHECK-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR9]] -; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] +; CHECK-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] ; CHECK-DISABLED-NEXT: br label [[RETURN]] ; CHECK-DISABLED: return: ; CHECK-DISABLED-NEXT: ret void @@ -2424,7 +2431,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR7]] +; CHECK-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2441,7 +2448,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2467,7 +2474,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; @@ -2515,7 +2522,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR9]] +; CHECK-DISABLED-NEXT: call void @p0() #[[ATTR10]] ; CHECK-DISABLED-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -124,9 +124,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -135,7 +135,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_l5 ; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -144,8 +144,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -172,9 +173,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -227,7 +228,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -255,7 +256,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -297,7 +298,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 @@ -305,7 +306,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -331,7 +332,7 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -345,7 +346,7 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -381,9 +382,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -392,7 +393,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -401,8 +402,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -429,9 +431,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -467,7 +469,7 @@ ; CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; CHECK-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; CHECK-NEXT: store i32 0, i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: @@ -485,7 +487,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 @@ -498,7 +500,7 @@ ; CHECK-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; CHECK-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; CHECK-DISABLED-NEXT: store i32 0, i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; CHECK-DISABLED: for.cond: @@ -516,7 +518,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -566,7 +568,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 @@ -574,7 +576,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -598,7 +600,7 @@ ; CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -612,7 +614,7 @@ ; CHECK-DISABLED-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 ; CHECK-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -640,9 +642,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -651,7 +653,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -660,8 +662,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -688,9 +691,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -743,7 +746,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 @@ -773,7 +776,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -826,7 +829,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -839,7 +842,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -873,7 +876,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -891,7 +894,7 @@ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -921,9 +924,9 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -932,7 +935,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guarded_l50 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -941,8 +944,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -969,9 +973,9 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -1039,7 +1043,7 @@ ; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 @@ -1071,7 +1075,7 @@ ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[I]], align 4 ; CHECK-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-DISABLED: for.end: -; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; CHECK-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1125,7 +1129,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1138,7 +1142,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4 ; CHECK-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; CHECK-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1172,7 +1176,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1190,7 +1194,7 @@ ; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 ; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** ; CHECK-DISABLED-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1214,7 +1218,7 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 false, i1 false, i1 true) @@ -1222,8 +1226,9 @@ ; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK: worker_state_machine.begin: ; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1244,8 +1249,8 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] +; CHECK-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -1254,7 +1259,7 @@ ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a34ca11_do_not_spmdize_target_l65 ; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; CHECK-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 ; CHECK-DISABLED-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 @@ -1263,8 +1268,9 @@ ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: ; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) -; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** +; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) +; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* ; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null ; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] @@ -1285,8 +1291,8 @@ ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLED: user_code.entry: -; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] +; CHECK-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR4]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: @@ -1317,7 +1323,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 @@ -1325,7 +1331,7 @@ ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -1369,19 +1375,21 @@ ;. ; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK: attributes #[[ATTR3]] = { nounwind } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind } -; CHECK: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; CHECK: attributes #[[ATTR6]] = { convergent } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; CHECK: attributes #[[ATTR4]] = { nounwind } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } +; CHECK: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; CHECK: attributes #[[ATTR7]] = { convergent } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } -; CHECK-DISABLED: attributes #[[ATTR3]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind } -; CHECK-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent } +; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } +; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind } +; CHECK-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; CHECK-DISABLED: attributes #[[ATTR7]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop_to_stack_var", i32 20, i32 1} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171231761, !"sequential_loop", i32 5, i32 0}