diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -110,6 +110,10 @@ # For Windows, there is a resource file (.rc -> .res) that is also compiled libomp_append(LIBOMP_SOURCE_FILES libomp.rc WIN32) +if(${LIBOMP_UNSHACKLED_TASK_SUPPORT}) + add_definitions(-DUNSHACKLED_TASK_SUPPORT) +endif() + # Get compiler and assembler flags libomp_get_cxxflags(LIBOMP_CONFIGURED_CXXFLAGS) libomp_get_asmflags(LIBOMP_CONFIGURED_ASMFLAGS) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2239,7 +2239,8 @@ unsigned priority_specified : 1; /* set if the compiler provides priority setting for the task */ unsigned detachable : 1; /* 1 == can detach */ - unsigned reserved : 9; /* reserved for compiler use */ + unsigned unshackled : 1; /* 1 == unshackled task */ + unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ @@ -2822,6 +2823,7 @@ extern volatile int __kmp_init_monitor; #endif extern volatile int __kmp_init_user_locks; +extern volatile int __kmp_init_unshackled_threads; extern int __kmp_init_counter; extern int __kmp_root_counter; extern int __kmp_version; @@ -3052,7 +3054,9 @@ static inline bool KMP_UBER_GTID(int gtid) { KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN); - KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity); + KMP_DEBUG_ASSERT(gtid < __kmp_init_unshackled_threads + ? 2 * __kmp_threads_capacity + : __kmp_threads_capacity); return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] && __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread); } @@ -3910,6 +3914,22 @@ extern void __kmp_omp_display_env(int verbose); +#if UNSHACKLED_TASK_SUPPORT +// Master thread of unshackled team +extern kmp_info_t *__kmp_unshackled_master_thread; +extern int __kmp_unshackled_threads_num; +extern void __kmp_unshackled_threads_initz_routine(); +extern void __kmp_initialize_unshackled_threads(); +extern void __kmp_do_initialize_unshackled_threads(); +extern void __kmp_unshackled_threads_initz_wait(); +extern void __kmp_unshackled_initz_release(); +extern void __kmp_unshackled_master_thread_wait(); +extern void __kmp_unshackled_worker_thread_wait(); +extern void __kmp_unshackled_worker_thread_signal(); + +#define IS_UNSHACKLED_THREAD(gtid) ((gtid) >= __kmp_threads_capacity) +#endif + #ifdef __cplusplus } #endif diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -51,6 +51,9 @@ 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ #endif volatile int __kmp_init_user_locks = FALSE; +#if UNSHACKLED_TASK_SUPPORT +volatile int __kmp_init_unshackled_threads = FALSE; +#endif /* list of address of allocated caches for commons */ kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -3611,6 +3611,12 @@ serial initialization may be not a real initial thread). */ capacity = __kmp_threads_capacity; +#if UNSHACKLED_TASK_SUPPORT + if (__kmp_init_unshackled_threads) { + // The capacity doubles if we have unshackled threads + capacity *= 2; + } +#endif if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { --capacity; } @@ -3627,15 +3633,27 @@ } } - /* find an available thread slot */ - /* Don't reassign the zero slot since we need that to only be used by initial - thread */ - for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; - gtid++) - ; - KA_TRACE(1, - ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); - KMP_ASSERT(gtid < __kmp_threads_capacity); + if (!__kmp_init_unshackled_threads) { + /* find an available thread slot */ + /* Don't reassign the zero slot since we need that to only be used by + initial thread */ + for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; + gtid++) + ; + KA_TRACE( + 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); + KMP_ASSERT(gtid < __kmp_threads_capacity); + } else { + /* find an available thread slot */ + /* Don't reassign the zero slot since we need that to only be used by + initial thread */ + for (gtid = __kmp_threads_capacity; TCR_PTR(__kmp_threads[gtid]) != NULL; + gtid++) + ; + KA_TRACE( + 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); + KMP_ASSERT(gtid < 2 * __kmp_threads_capacity); + } /* update global accounting */ __kmp_all_nth++; @@ -4292,8 +4310,17 @@ #endif KMP_MB(); - for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { - KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); + // If we're initializing the unshackled threads, the start point is at the end + // of regular threads array, a.k.a the start of unshackled threads array + if (__kmp_init_unshackled_threads) { + for (new_gtid = __kmp_threads_capacity; + TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { + KMP_DEBUG_ASSERT(new_gtid < 2 * __kmp_threads_capacity); + } + } else { + for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { + KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); + } } /* allocate space for it. */ @@ -6677,9 +6704,16 @@ size = (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + CACHE_LINE; +#if UNSHACKLED_TASK_SUPPORT + size *= 2; +#endif __kmp_threads = (kmp_info_t **)__kmp_allocate(size); __kmp_root = (kmp_root_t **)((char *)__kmp_threads + sizeof(kmp_info_t *) * __kmp_threads_capacity); +#if UNSHACKLED_TASK_SUPPORT + __kmp_root = (kmp_root_t **)((char *)__kmp_root + + sizeof(kmp_info_t *) * __kmp_threads_capacity); +#endif /* init thread counts */ KMP_DEBUG_ASSERT(__kmp_all_nth == @@ -6951,6 +6985,10 @@ KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); __kmp_release_bootstrap_lock(&__kmp_initz_lock); + +#if UNSHACKLED_TASK_SUPPORT + __kmp_initialize_unshackled_threads(); +#endif } /* ------------------------------------------------------------------------ */ @@ -8297,7 +8335,6 @@ } } - void __kmp_omp_display_env(int verbose) { __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); if (__kmp_init_serial == 0) @@ -8305,3 +8342,46 @@ __kmp_display_env_impl(!verbose, verbose); __kmp_release_bootstrap_lock(&__kmp_initz_lock); } + +#if UNSHACKLED_TASK_SUPPORT +kmp_info_t *__kmp_unshackled_master_thread; +int __kmp_unshackled_threads_num; + +namespace { +void __kmp_unshackled_wrapper_fn(int *gtid, int *, ...) { + // If master thread, then wait for signal + if (__kmpc_master(nullptr, *gtid)) { + // First, unset the initial state and release the initial thread + __kmp_init_unshackled_threads = FALSE; + __kmp_unshackled_initz_release(); + __kmp_unshackled_master_thread_wait(); + } +} +} // namespace + +void __kmp_unshackled_threads_initz_routine() { + kmp_info_t *master_thread = nullptr; + + // Create a new root for unshackled team/threads + const int gtid = __kmp_register_root(TRUE); + __kmp_unshackled_master_thread = master_thread = __kmp_threads[gtid]; + + // TODO: Determine how many unshackled threads + __kmp_unshackled_threads_num = 8; + master_thread->th.th_set_nproc = __kmp_unshackled_threads_num; + + __kmpc_fork_call(nullptr, 0, __kmp_unshackled_wrapper_fn); +} + +void __kmp_initialize_unshackled_threads() { + // Set the global variable indicating that we're initializing unshackled + // team/threads + __kmp_init_unshackled_threads = TRUE; + + __kmp_do_initialize_unshackled_threads(); + + // Wait here for the finish of initialization of unshackled teams + __kmp_unshackled_threads_initz_wait(); +} + +#endif diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -324,6 +324,17 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + +#if UNSHACKLED_TASK_SUPPORT + // If the task is unshackled, we always push it into the master thread of + // unshackled team, and update gtid to the gtid of the master thread + taskdata->td_flags.unshackled = 1; + if (taskdata->td_flags.unshackled) { + thread = __kmp_unshackled_master_thread; + gtid = thread->th.th_info.ds.ds_gtid; + } +#endif + kmp_task_team_t *task_team = thread->th.th_task_team; kmp_int32 tid = __kmp_tid_from_gtid(gtid); kmp_thread_data_t *thread_data; @@ -424,6 +435,14 @@ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); +#if UNSHACKLED_TASK_SUPPORT + // Signal one worker thread to execute the task + taskdata->td_flags.unshackled = 1; + if (taskdata->td_flags.unshackled) { + __kmp_unshackled_worker_thread_signal(); + } +#endif + return TASK_SUCCESSFULLY_PUSHED; } @@ -1165,6 +1184,19 @@ if (!TCR_4(__kmp_init_middle)) __kmp_middle_initialize(); + flags->unshackled = 1; + + if (flags->unshackled) { + // Since unshackled threads are allocated via __kmpc_fork_call, we need to + // initialize parallel correspondingly + if (!TCR_4(__kmp_init_parallel)) { + __kmp_parallel_initialize(); + } + thread = __kmp_unshackled_master_thread; + team = thread->th.th_team; + parent_task = thread->th.th_current_task; + } + KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, @@ -1288,6 +1320,7 @@ taskdata->td_flags.destructors_thunk = flags->destructors_thunk; taskdata->td_flags.proxy = flags->proxy; taskdata->td_flags.detachable = flags->detachable; + taskdata->td_flags.unshackled = flags->unshackled; taskdata->td_task_team = thread->th.th_task_team; taskdata->td_size_alloc = shareds_offset + sizeof_shareds; taskdata->td_flags.tasktype = TASK_EXPLICIT; diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -381,6 +381,13 @@ break; } +#if UNSHACKLED_TASK_SUPPORT + if (IS_UNSHACKLED_THREAD(th_gtid)) { + __kmp_unshackled_worker_thread_wait(); + continue; + } +#endif + // Don't suspend if KMP_BLOCKTIME is set to "infinite" if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && __kmp_pause_status != kmp_soft_paused) diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2439,7 +2439,7 @@ , void **exit_frame_ptr #endif - ) { +) { #if OMPT_SUPPORT *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); #endif @@ -2518,4 +2518,86 @@ #endif +#if UNSHACKLED_TASK_SUPPORT + +namespace { +pthread_t __kmp_unshackled_master_thread_handle; + +pthread_cond_t __kmp_unshackled_worker_thread_cond_var; +pthread_mutex_t __kmp_unshackled_threads_wait_lock; + +// Condition variable for initializing unshackled team +pthread_cond_t __kmp_unshackled_threads_initz_cond_var; +pthread_mutex_t __kmp_unshackled_threads_initz_lock; + +// Condition variable for the wrapper function of master thread +pthread_cond_t __kmp_unshackled_master_thread_cond_var; +pthread_mutex_t _kmp_unshackled_master_thread_lock; + +} // namespace + +void __kmp_unshackled_worker_thread_wait() { + if (pthread_cond_wait(&__kmp_unshackled_worker_thread_cond_var, + &__kmp_unshackled_threads_wait_lock)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_do_initialize_unshackled_threads() { + // Initialize condition variable + if (pthread_cond_init(&__kmp_unshackled_threads_initz_cond_var, nullptr)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } + if (pthread_cond_init(&__kmp_unshackled_worker_thread_cond_var, nullptr)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } + if (pthread_cond_init(&__kmp_unshackled_master_thread_cond_var, nullptr)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } + + // Create a new thread to finish initialization + if (pthread_create( + &__kmp_unshackled_master_thread_handle, nullptr, + [](void *) -> void * { __kmp_unshackled_threads_initz_routine(); }, + nullptr)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_threads_initz_wait() { + // Initial thread waits here for the completion of the initialization. The + // condition variable will be notified by master thread of unshackled teams + if (pthread_cond_wait(&__kmp_unshackled_threads_initz_cond_var, + &__kmp_unshackled_threads_initz_lock)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_initz_release() { + // After all initialization, reset __kmp_init_unshackled_threads to false + __kmp_init_unshackled_threads = FALSE; + + // Notify the initial thread + if (pthread_cond_signal(&__kmp_unshackled_threads_initz_cond_var)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_master_thread_wait() { + // The master thread of unshackled team will be blocked here. The + // condition variable can only be signal in the destructor of RTL + if (pthread_cond_wait(&__kmp_unshackled_master_thread_cond_var, + &_kmp_unshackled_master_thread_lock)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +void __kmp_unshackled_worker_thread_signal() { + if (pthread_cond_signal(&__kmp_unshackled_worker_thread_cond_var)) { + __kmp_fatal(KMP_MSG(CantRegisterNewThread)); + } +} + +#endif + // end of file //