Index: openmp/runtime/CMakeLists.txt =================================================================== --- openmp/runtime/CMakeLists.txt +++ openmp/runtime/CMakeLists.txt @@ -288,6 +288,9 @@ set (LIBOMP_USE_VERSION_SYMBOLS FALSE) endif() +# Unshackled task support defaults to OFF +set(LIBOMP_USE_UNSHACKLED_TASK FALSE CACHE BOOL "Use unshackled task?") + # OMPT-support defaults to ON for OpenMP 5.0+ and if the requirements in # cmake/config-ix.cmake are fulfilled. set(OMPT_DEFAULT FALSE) Index: openmp/runtime/src/kmp.h =================================================================== --- openmp/runtime/src/kmp.h +++ openmp/runtime/src/kmp.h @@ -2235,7 +2235,14 @@ unsigned priority_specified : 1; /* set if the compiler provides priority setting for the task */ unsigned detachable : 1; /* 1 == can detach */ - unsigned reserved : 9; /* reserved for compiler use */ +#if USE_UNSHACKLED_TASK + // 1 == unshackled task + // Although we mark it as a compiler flag, currently it is not set by compiler + // in fact. All tasks created via target task related interfaces will set this + // flag. Probably in the future it will be used by other features. + unsigned unshackled : 1; +#endif + unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ @@ -2283,6 +2290,15 @@ kmp_depnode_t *td_depnode; // Pointer to graph node if this task has dependencies kmp_task_team_t *td_task_team; +#if USE_UNSHACKLED_TASK + // The task team of its parent task team. Usually we could access it via + // parent_task->td_task_team, but it is possible that + // parent_task->td_task_team is nullptr because of late initialization. + // Sometimes we must use this pointer, and the td_task_team of the + // encountering thread is never nullptr, therefore we set it when the task is + // created. + kmp_task_team_t *td_parent_task_team; +#endif kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. #if defined(KMP_GOMP_COMPAT) // 4 or 8 byte integers for the loop bounds in GOMP_taskloop @@ -2354,6 +2370,11 @@ KMP_ALIGN_CACHE std::atomic tt_unfinished_threads; /* #threads still active */ +#if USE_UNSHACKLED_TASK + KMP_ALIGN_CACHE + std::atomic tt_unfinished_unshackled_tasks; +#endif + KMP_ALIGN_CACHE volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ @@ -2818,6 +2839,10 @@ extern volatile int __kmp_init_monitor; #endif extern volatile int __kmp_init_user_locks; +#if USE_UNSHACKLED_TASK +// Set to TRUE when the unshackled team is being initialized +extern volatile int __kmp_init_unshackled_threads; +#endif extern int __kmp_init_counter; extern int __kmp_root_counter; extern int __kmp_version; @@ -3048,7 +3073,13 @@ static inline bool KMP_UBER_GTID(int gtid) { KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN); +#if USE_UNSHACKLED_TASK + KMP_DEBUG_ASSERT(gtid < __kmp_init_unshackled_threads + ? 2 * __kmp_threads_capacity + : __kmp_threads_capacity); +#else KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity); +#endif return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] && __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread); } @@ -3906,6 +3937,30 @@ extern void __kmp_omp_display_env(int verbose); +#if USE_UNSHACKLED_TASK +// Master thread of unshackled team +extern kmp_info_t *__kmp_unshackled_master_thread; +// Descriptors for the unshackled threads +extern kmp_info_t **__kmp_unshackled_threads; +extern int __kmp_unshackled_threads_num; + +extern void __kmp_unshackled_threads_initz_routine(); +extern void __kmp_initialize_unshackled_threads(); +extern void __kmp_do_initialize_unshackled_threads(); +extern void __kmp_unshackled_threads_initz_wait(); +extern void __kmp_unshackled_initz_release(); +extern void __kmp_unshackled_master_thread_wait(); +extern void __kmp_unshackled_worker_thread_wait(); +extern void __kmp_unshackled_worker_thread_signal(); + +// Check whether a given thread is an unshackled thread +#define KMP_UNSHACKLED_THREAD(gtid) ((gtid) >= __kmp_threads_capacity) +// Map a gtid to an unshackled thread. The first unshackled thread, a.k.a master +// thread, is skipped. +#define KMP_GTID_TO_SHADOW_GTID(gtid) \ + ((gtid) % (__kmp_unshackled_threads_num - 1) + 1) +#endif + #ifdef __cplusplus } #endif Index: openmp/runtime/src/kmp_config.h.cmake =================================================================== --- openmp/runtime/src/kmp_config.h.cmake +++ openmp/runtime/src/kmp_config.h.cmake @@ -44,6 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT +#cmakedefine01 LIBOMP_USE_UNSHACKLED_TASK +#define USE_UNSHACKLED_TASK LIBOMP_USE_UNSHACKLED_TASK #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS Index: openmp/runtime/src/kmp_global.cpp =================================================================== --- openmp/runtime/src/kmp_global.cpp +++ openmp/runtime/src/kmp_global.cpp @@ -51,6 +51,9 @@ 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ #endif volatile int __kmp_init_user_locks = FALSE; +#if USE_UNSHACKLED_TASK +volatile int __kmp_init_unshackled_threads = FALSE; +#endif /* list of address of allocated caches for commons */ kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; Index: openmp/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/runtime/src/kmp_runtime.cpp +++ openmp/runtime/src/kmp_runtime.cpp @@ -3611,6 +3611,12 @@ serial initialization may be not a real initial thread). */ capacity = __kmp_threads_capacity; +#if USE_UNSHACKLED_TASK + // The capacity doubles if we enable unshackled task and are initializing the + // unshackled team + if (__kmp_init_unshackled_threads) + capacity *= 2; +#endif if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } @@ -3627,15 +3633,30 @@ } } - /* find an available thread slot */ - /* Don't reassign the zero slot since we need that to only be used by initial - thread */ - for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; - gtid++) - ; - KA_TRACE(1, - ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); - KMP_ASSERT(gtid < __kmp_threads_capacity); +#if USE_UNSHACKLED_TASK + if (!__kmp_init_unshackled_threads) { +#endif + /* find an available thread slot */ + /* Don't reassign the zero slot since we need that to only be used by + initial thread */ + for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; + gtid++) + ; + KA_TRACE( + 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); + KMP_ASSERT(gtid < __kmp_threads_capacity); +#if USE_UNSHACKLED_TASK + } else { + // When initializing the unshackled team, we find the first empty slot from + // the second half of __kmp_threads + for (gtid = __kmp_threads_capacity; TCR_PTR(__kmp_threads[gtid]) != NULL; + gtid++) + ; + KA_TRACE( + 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); + KMP_ASSERT(gtid < 2 * __kmp_threads_capacity); + } +#endif /* update global accounting */ __kmp_all_nth++; @@ -4292,9 +4313,23 @@ #endif KMP_MB(); - for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { - KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); + +#if USE_UNSHACKLED_TASK + // If we're initializing the unshackled threads, the start point is at the end + // of regular threads array, a.k.a the start of unshackled threads array + if (__kmp_init_unshackled_threads) { + for (new_gtid = __kmp_threads_capacity; + TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { + KMP_DEBUG_ASSERT(new_gtid < 2 * __kmp_threads_capacity); + } + } else { +#endif + for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { + KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); + } +#if USE_UNSHACKLED_TASK } +#endif /* allocate space for it. */ new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); @@ -6674,12 +6709,18 @@ /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are * expandable */ /* Since allocation is cache-aligned, just add extra padding at the end */ - size = - (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + - CACHE_LINE; + size = (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +#if USE_UNSHACKLED_TASK + * 2 +#endif + + CACHE_LINE; __kmp_threads = (kmp_info_t **)__kmp_allocate(size); __kmp_root = (kmp_root_t **)((char *)__kmp_threads + - sizeof(kmp_info_t *) * __kmp_threads_capacity); + sizeof(kmp_info_t *) * __kmp_threads_capacity +#if USE_UNSHACKLED_TASK + * 2 +#endif + ); /* init thread counts */ KMP_DEBUG_ASSERT(__kmp_all_nth == @@ -6951,6 +6992,10 @@ KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); __kmp_release_bootstrap_lock(&__kmp_initz_lock); + +#if USE_UNSHACKLED_TASK + __kmp_initialize_unshackled_threads(); +#endif } /* ------------------------------------------------------------------------ */ @@ -8297,7 +8342,6 @@ } } - void __kmp_omp_display_env(int verbose) { __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); if (__kmp_init_serial == 0) @@ -8305,3 +8349,48 @@ __kmp_display_env_impl(!verbose, verbose); __kmp_release_bootstrap_lock(&__kmp_initz_lock); } + +#if USE_UNSHACKLED_TASK +kmp_info_t **__kmp_unshackled_threads; +kmp_info_t *__kmp_unshackled_master_thread; +int __kmp_unshackled_threads_num; + +namespace { +void __kmp_unshackled_wrapper_fn(int *gtid, int *, ...) { + // If master thread, then wait for signal + if (__kmpc_master(nullptr, *gtid)) { + // First, unset the initial state and release the initial thread + __kmp_init_unshackled_threads = FALSE; + __kmp_unshackled_initz_release(); + __kmp_unshackled_master_thread_wait(); + } +} +} // namespace + +void __kmp_unshackled_threads_initz_routine() { + kmp_info_t *master_thread = nullptr; + + // Create a new root for unshackled team/threads + const int gtid = __kmp_register_root(TRUE); + __kmp_unshackled_master_thread = master_thread = __kmp_threads[gtid]; + __kmp_unshackled_threads = &__kmp_threads[gtid]; + + // TODO: Determine how many unshackled threads + __kmp_unshackled_threads_num = 8; + master_thread->th.th_set_nproc = __kmp_unshackled_threads_num; + + __kmpc_fork_call(nullptr, 0, __kmp_unshackled_wrapper_fn); +} + +void __kmp_initialize_unshackled_threads() { + // Set the global variable indicating that we're initializing unshackled + // team/threads + __kmp_init_unshackled_threads = TRUE; + + __kmp_do_initialize_unshackled_threads(); + + // Wait here for the finish of initialization of unshackled teams + __kmp_unshackled_threads_initz_wait(); +} + +#endif Index: openmp/runtime/src/kmp_tasking.cpp =================================================================== --- openmp/runtime/src/kmp_tasking.cpp +++ openmp/runtime/src/kmp_tasking.cpp @@ -325,6 +325,16 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + +#if USE_UNSHACKLED_TASK + // If the task is unshackled, we push it into dequeue of the corresponding + // unshackled thread + if (taskdata->td_flags.unshackled) { + thread = __kmp_unshackled_threads[KMP_GTID_TO_SHADOW_GTID(gtid)]; + gtid = thread->th.th_info.ds.ds_gtid; + } +#endif + kmp_task_team_t *task_team = thread->th.th_task_team; kmp_int32 tid = __kmp_tid_from_gtid(gtid); kmp_thread_data_t *thread_data; @@ -363,7 +373,8 @@ // Find tasking deque specific to encountering thread thread_data = &task_team->tt.tt_threads_data[tid]; - // No lock needed since only owner can allocate + // No lock needed even if the task is unshackled because we have initialized + // the dequeue for unshackled thread data if (thread_data->td.td_deque == NULL) { __kmp_alloc_task_deque(thread, thread_data); } @@ -428,6 +439,12 @@ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); +#if USE_UNSHACKLED_TASK + // Signal one worker thread to execute the task + if (taskdata->td_flags.unshackled) + __kmp_unshackled_worker_thread_signal(); +#endif + return TASK_SUCCESSFULLY_PUSHED; } @@ -720,7 +737,6 @@ #else /* ! USE_FAST_MEMORY */ __kmp_thread_free(thread, taskdata); #endif - KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata)); } @@ -930,7 +946,7 @@ #endif // Only need to keep track of count if team parallel and tasking not - // serialized, or task is detachable and event has already been fulfilled + // serialized, or task is detachable and event has already been fulfilled if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) || taskdata->td_flags.detachable == TASK_DETACHABLE) { // Predecrement simulated by "- 1" calculation @@ -939,6 +955,10 @@ KMP_DEBUG_ASSERT(children >= 0); if (taskdata->td_taskgroup) KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); +#if USE_UNSHACKLED_TASK + if (taskdata->td_flags.unshackled && taskdata->td_parent_task_team) + KMP_ATOMIC_DEC(&taskdata->td_parent_task_team->tt.tt_unfinished_unshackled_tasks); +#endif __kmp_release_deps(gtid, taskdata); } else if (task_team && task_team->tt.tt_found_proxy_tasks) { // if we found proxy tasks there could exist a dependency chain @@ -1180,6 +1200,9 @@ kmp_task_t *task; kmp_taskdata_t *taskdata; kmp_info_t *thread = __kmp_threads[gtid]; +#if USE_UNSHACKLED_TASK + kmp_info_t *encountering_thread = thread; +#endif kmp_team_t *team = thread->th.th_team; kmp_taskdata_t *parent_task = thread->th.th_current_task; size_t shareds_offset; @@ -1187,6 +1210,24 @@ if (!TCR_4(__kmp_init_middle)) __kmp_middle_initialize(); +#if USE_UNSHACKLED_TASK + if (flags->unshackled) { + // Since unshackled threads are allocated via __kmpc_fork_call, we need to + // initialize parallel correspondingly + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + // For an unshackled task encountered by a regular thread, we will push the + // task to the (gtid%__kmp_unshackled_threads_num)-th unshackled thread + if (!KMP_UNSHACKLED_THREAD(gtid)) { + thread = __kmp_unshackled_threads[KMP_GTID_TO_SHADOW_GTID(gtid)]; + team = thread->th.th_team; + // We don't change the parent-child relation for unshackled task as we + // need that to do per-task-region synchronization + } + } +#endif + KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, @@ -1197,6 +1238,13 @@ } flags->final = 1; } + +#if USE_UNSHACKLED_TASK + // Unshackled thread is never final + if (flags->unshackled) + flags->final = 0; +#endif + if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) { // Untied task encountered causes the TSC algorithm to check entire deque of // the victim thread. If no untied task encountered, then checking the head @@ -1259,11 +1307,23 @@ // Avoid double allocation here by combining shareds with taskdata #if USE_FAST_MEMORY +#if USE_UNSHACKLED_TASK + // In order to avoid race condition and using lock here, we allocate the + // memory from the encountering thread + taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( + encountering_thread, shareds_offset + sizeof_shareds); +#else taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + sizeof_shareds); +#endif #else /* ! USE_FAST_MEMORY */ +#if USE_UNSHACKLED_TASK + taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( + encountering_thread, shareds_offset + sizeof_shareds); +#else taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset + sizeof_shareds); +#endif #endif /* USE_FAST_MEMORY */ ANNOTATE_HAPPENS_AFTER(taskdata); @@ -1310,6 +1370,10 @@ taskdata->td_flags.destructors_thunk = flags->destructors_thunk; taskdata->td_flags.proxy = flags->proxy; taskdata->td_flags.detachable = flags->detachable; +#if USE_UNSHACKLED_TASK + taskdata->td_flags.unshackled = flags->unshackled; + taskdata->td_parent_task_team = encountering_thread->th.th_task_team; +#endif taskdata->td_task_team = thread->th.th_task_team; taskdata->td_size_alloc = shareds_offset + sizeof_shareds; taskdata->td_flags.tasktype = TASK_EXPLICIT; @@ -1365,6 +1429,11 @@ if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); } +#if USE_UNSHACKLED_TASK + if (flags->unshackled && taskdata->td_parent_task_team) + KMP_ATOMIC_INC( + &taskdata->td_parent_task_team->tt.tt_unfinished_unshackled_tasks); +#endif } KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", @@ -1405,6 +1474,12 @@ size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id) { +#if USE_UNSHACKLED_TASK + // All tasks allocated via this API should be an unshackled and untied task + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; + input_flags->unshackled = TRUE; + input_flags->tiedness = FALSE; +#endif return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry); } @@ -1870,6 +1945,13 @@ must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks); + +#if USE_UNSHACKLED_TASK + // If unshackled thread is enabled, we must enable wait as there might be + // task outside of any parallel region + must_wait = true; +#endif + if (must_wait) { kmp_flag_32 flag(RCAST(std::atomic *, &(taskdata->td_incomplete_child_tasks)), @@ -2827,7 +2909,13 @@ thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); +#if USE_UNSHACKLED_TASK + // This can happen when unshackled task is enabled + if (threads_data == nullptr) + return FALSE; +#else KMP_DEBUG_ASSERT(threads_data != NULL); +#endif nthreads = task_team->tt.tt_nproc; unfinished_threads = &(task_team->tt.tt_unfinished_threads); @@ -2911,8 +2999,8 @@ } } - if (task == NULL) // break out of tasking loop - break; + if (task == NULL) + break; // break out of tasking loop // Found a task; execute it #if USE_ITT_BUILD && USE_ITT_NOTIFY @@ -3357,6 +3445,9 @@ task_team->tt.tt_nproc = nthreads = team->t.t_nproc; KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads); +#if USE_UNSHACKLED_TASK + KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_unshackled_tasks, 0); +#endif TCW_4(task_team->tt.tt_active, TRUE); KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p " @@ -3508,6 +3599,22 @@ __kmp_gtid_from_thread(this_thr), team->t.t_task_team[other_team], ((team != NULL) ? team->t.t_id : -1), other_team)); +#if USE_UNSHACKLED_TASK + // For regular thread, the task enabling should be called when the first + // task is going to be pushed to a dequeue. However, for the unshackled + // thread, we need it ahead of time so that some operations can be + // performed without using lock to avoid race condition. + kmp_task_team_t *task_team = team->t.t_task_team[other_team]; + if (this_thr == __kmp_unshackled_master_thread && + !KMP_TASKING_ENABLED(task_team)) { + __kmp_enable_tasking(task_team, this_thr); + for (int i = 0; i < task_team->tt.tt_nproc; ++i) { + kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[i]; + if (!thread_data->td.td_deque) + __kmp_alloc_task_deque(__kmp_unshackled_threads[i], thread_data); + } + } +#endif } else { // Leave the old task team struct in place for the upcoming region; // adjust as needed kmp_task_team_t *task_team = team->t.t_task_team[other_team]; @@ -3595,6 +3702,14 @@ TCW_PTR(this_thr->th.th_task_team, NULL); } + +#if USE_UNSHACKLED_TASK + // We still need to wait here when there is any unfinished unshackled task. + // Simply using looping here should not hurt. + if (task_team) + while (KMP_ATOMIC_LD_ACQ(&task_team->tt.tt_unfinished_unshackled_tasks)) + ; +#endif } // __kmp_tasking_barrier: Index: openmp/runtime/src/kmp_wait_release.h =================================================================== --- openmp/runtime/src/kmp_wait_release.h +++ openmp/runtime/src/kmp_wait_release.h @@ -381,6 +381,23 @@ break; } +#if USE_UNSHACKLED_TASK + // For unshackled thread, if task_team is nullptr, it means the master + // thread has not released the barrier. We cannot wait here because once the + // master thread releases all children barriers, all unshackled threads are + // still sleeping. This leads to a problem that following configuration, + // such as task team sync, will not be performed such that this thread does + // not have task team. Usually it is not bad. However, a corner case is, + // when the first task encountered is an untied task, the check in + // __kmp_task_alloc will crash because it uses the task team pointer without + // checking whether it is nullptr. It is probably under some kind of + // assumption. + if (task_team && KMP_UNSHACKLED_THREAD(th_gtid)) { + __kmp_unshackled_worker_thread_wait(); + continue; + } +#endif + // Don't suspend if KMP_BLOCKTIME is set to "infinite" if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) Index: openmp/runtime/src/z_Linux_util.cpp =================================================================== --- openmp/runtime/src/z_Linux_util.cpp +++ openmp/runtime/src/z_Linux_util.cpp @@ -25,6 +25,7 @@ #include #endif #include // HUGE_VAL. +#include #include #include #include @@ -2439,7 +2440,7 @@ , void **exit_frame_ptr #endif - ) { +) { #if OMPT_SUPPORT *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); #endif @@ -2518,4 +2519,82 @@ #endif +#if USE_UNSHACKLED_TASK + +namespace { +pthread_t __kmp_unshackled_master_thread_handle; + +// Condition variable for initializing unshackled team +pthread_cond_t __kmp_unshackled_threads_initz_cond_var; +pthread_mutex_t __kmp_unshackled_threads_initz_lock; + +// Condition variable for the wrapper function of master thread +pthread_cond_t __kmp_unshackled_master_thread_cond_var; +pthread_mutex_t _kmp_unshackled_master_thread_lock; + +sem_t __kmp_unshackled_task_sem; +} // namespace + +void __kmp_unshackled_worker_thread_wait() { + if (sem_wait(&__kmp_unshackled_task_sem)) + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); +} + +void __kmp_do_initialize_unshackled_threads() { + // Initialize condition variable + if (pthread_cond_init(&__kmp_unshackled_threads_initz_cond_var, nullptr)) + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + if (pthread_cond_init(&__kmp_unshackled_master_thread_cond_var, nullptr)) + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + + if (sem_init(&__kmp_unshackled_task_sem, 0, 0)) + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + + // Create a new thread to finish initialization + if (pthread_create( + &__kmp_unshackled_master_thread_handle, nullptr, + [](void *) -> void * { + __kmp_unshackled_threads_initz_routine(); + return nullptr; + }, + nullptr)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_threads_initz_wait() { + // Initial thread waits here for the completion of the initialization. The + // condition variable will be notified by master thread of unshackled teams + if (pthread_cond_wait(&__kmp_unshackled_threads_initz_cond_var, + &__kmp_unshackled_threads_initz_lock)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_initz_release() { + // After all initialization, reset __kmp_init_unshackled_threads to false + __kmp_init_unshackled_threads = FALSE; + + // Notify the initial thread + if (pthread_cond_signal(&__kmp_unshackled_threads_initz_cond_var)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_master_thread_wait() { + // The master thread of unshackled team will be blocked here. The + // condition variable can only be signal in the destructor of RTL + if (pthread_cond_wait(&__kmp_unshackled_master_thread_cond_var, + &_kmp_unshackled_master_thread_lock)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_worker_thread_signal() { + if (sem_post(&__kmp_unshackled_task_sem)) + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); +} + +#endif + // end of file //