diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -397,6 +397,13 @@ __kmpc_end_scope 287 %endif +%ifndef stub + __kmpc_copyprivate_light 288 + __kmpc_sections_init 289 + __kmpc_next_section 290 + __kmpc_end_sections 291 +%endif + # User API entry points that have both lower- and upper- case versions for Fortran. # Number for lowercase version is indicated. Number for uppercase is obtained by adding 1000. # User API entry points are entry points that start with 'kmp_' or 'omp_'. diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3890,6 +3890,11 @@ KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid, + kmp_int32 numberOfSections); +KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid); + KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int *plower, kmp_int *pupper, @@ -3903,6 +3908,9 @@ void (*cpy_func)(void *, void *), kmp_int32 didit); +KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, + void *cpy_data); + extern void KMPC_SET_NUM_THREADS(int arg); extern void KMPC_SET_DYNAMIC(int flag); extern void KMPC_SET_NESTED(int flag); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -2224,6 +2224,61 @@ } } +/* --------------------------------------------------------------------------*/ +/*! +@ingroup THREADPRIVATE +@param loc source location information +@param gtid global thread number +@param cpy_data pointer to the data to be saved/copied or 0 +@return the saved pointer to the data + +__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: +__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so +coming from single), and returns that pointer in all calls (for single thread +it's not needed). This version doesn't do any actual data copying. Data copying +has to be done somewhere else, e.g. inline in the generated code. Due to this, +this function doesn't have any barrier at the end of the function, like +__kmpc_copyprivate does, so generated code needs barrier after copying of all +data was done. +*/ +void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { + void **data_ptr; + + KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); + + KMP_MB(); + + data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; + + if (__kmp_env_consistency_check) { + if (loc == 0) { + KMP_WARNING(ConstructIdentInvalid); + } + } + + // ToDo: Optimize the following barrier + + if (cpy_data) + *data_ptr = cpy_data; + +#if OMPT_SUPPORT + ompt_frame_t *ompt_frame; + if (ompt_enabled.enabled) { + __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); + OMPT_STORE_RETURN_ADDRESS(gtid); + } +#endif +/* This barrier is not a barrier region boundary */ +#if USE_ITT_NOTIFY + __kmp_threads[gtid]->th.th_ident = loc; +#endif + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + return *data_ptr; +} + /* -------------------------------------------------------------------------- */ #define INIT_LOCK __kmp_init_user_lock_with_checks @@ -4348,7 +4403,7 @@ void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator) { return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, - free_allocator); + free_allocator); } void omp_free(void *ptr, omp_allocator_handle_t allocator) { diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -2285,6 +2285,219 @@ return status; } +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number +@return Zero if the parallel region is not active and this thread should execute +all sections, non-zero otherwise. + +Beginning of sections construct. +There are no implicit barriers in the "sections" calls, rather the compiler +should introduce an explicit barrier if it is required. + +This implementation is based on __kmp_dispatch_init, using same constructs for +shared data (we can't have sections nested directly in omp for loop, there +should be a parallel region in between) +*/ +kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 gtid) { + + int active; + kmp_info_t *th; + kmp_team_t *team; + kmp_uint32 my_buffer_index; + dispatch_shared_info_template volatile *sh; + + KMP_DEBUG_ASSERT(__kmp_init_serial); + + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + + /* setup data */ + th = __kmp_threads[gtid]; + team = th->th.th_team; + active = !team->t.t_serialized; + th->th.th_ident = loc; + + KMP_COUNT_BLOCK(OMP_SECTIONS); + KD_TRACE(10, ("__kmpc_sections: called by T#%d\n", gtid)); + + if (active) { + // Setup sections in the same way as dynamic scheduled loops. + // We need one shared data: which section is to execute next. + // (in case parallel is not active, all sections will be executed on the + // same thread) + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + my_buffer_index = th->th.th_dispatch->th_disp_index++; + + // reuse shared data structures from dynamic sched loops: + sh = reinterpret_cast volatile *>( + &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); + KD_TRACE(10, ("__kmpc_sections_init: T#%d my_buffer_index:%d\n", gtid, + my_buffer_index)); + + th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; + th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; + + KD_TRACE(100, ("__kmpc_sections_init: T#%d before wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + __kmp_wait(&sh->buffer_index, my_buffer_index, + __kmp_eq USE_ITT_BUILD_ARG(NULL)); + // Note: KMP_WAIT() cannot be used there: buffer index and + // my_buffer_index are *always* 32-bit integers. + KMP_MB(); + KD_TRACE(100, ("__kmpc_sections_init: T#%d after wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + + th->th.th_dispatch->th_dispatch_pr_current = + nullptr; // sections construct doesn't need private data + th->th.th_dispatch->th_dispatch_sh_current = + CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); + } + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_sections, ompt_scope_begin, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + KMP_PUSH_PARTITIONED_TIMER(OMP_sections); + + return active; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number +@param numberOfSections number of sections in the 'sections' construct +@return unsigned [from 0 to n) - number (id) of the section to execute next on +this thread. n (or any other number not in range) - nothing to execute on this +thread +*/ + +kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 gtid, + kmp_int32 numberOfSections) { + + KMP_TIME_PARTITIONED_BLOCK(OMP_sections); + + kmp_info_t *th = __kmp_threads[gtid]; +#ifdef KMP_DEBUG + kmp_team_t *team = th->th.th_team; +#endif + + KD_TRACE(1000, ("__kmp_dispatch_next: T#%d; number of sections:%d\n", gtid, + numberOfSections)); + + // For serialized case we should not call this function: + KMP_DEBUG_ASSERT(!team->t.t_serialized); + + dispatch_shared_info_template volatile *sh; + + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + KMP_DEBUG_ASSERT(!(th->th.th_dispatch->th_dispatch_pr_current)); + sh = reinterpret_cast volatile *>( + th->th.th_dispatch->th_dispatch_sh_current); + KMP_DEBUG_ASSERT(sh); + + kmp_int32 sectionIndex = 0; + bool moreSectionsToExecute = true; + + // Find section to execute: + sectionIndex = test_then_inc((kmp_int32 *)&sh->u.s.iteration); + if (sectionIndex >= numberOfSections) { + moreSectionsToExecute = false; + } + + // status == 0: no more sections to execute; + // OMPTODO: __kmpc_end_sections could be bypassed? + if (!moreSectionsToExecute) { + kmp_int32 num_done; + + num_done = test_then_inc((kmp_int32 *)(&sh->u.s.num_done)); + + if (num_done == th->th.th_team_nproc - 1) { + /* NOTE: release this buffer to be reused */ + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh->u.s.num_done = 0; + sh->u.s.iteration = 0; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + sh->buffer_index += __kmp_dispatch_num_buffers; + KD_TRACE(100, ("__kmpc_next_section: T#%d change buffer_index:%d\n", gtid, + sh->buffer_index)); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + } // if + + th->th.th_dispatch->th_deo_fcn = NULL; + th->th.th_dispatch->th_dxo_fcn = NULL; + th->th.th_dispatch->th_dispatch_sh_current = NULL; + th->th.th_dispatch->th_dispatch_pr_current = NULL; + +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_dispatch) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_data_t instance = ompt_data_none; + instance.ptr = OMPT_GET_RETURN_ADDRESS(0); + ompt_callbacks.ompt_callback(ompt_callback_dispatch)( + &(team_info->parallel_data), &(task_info->task_data), + ompt_dispatch_section, instance); + } +#endif + KMP_POP_PARTITIONED_TIMER(); + } + + return sectionIndex; +} + +/*! +@ingroup WORK_SHARING +@param loc source location information +@param global_tid global thread number + +End of "sections" construct. +Don't need to wait here: barrier is added separately when needed. +*/ +void __kmpc_end_sections(ident_t *loc, kmp_int32 gtid) { + + kmp_info_t *th = __kmp_threads[gtid]; + int active = !th->th.th_team->t.t_serialized; + + KD_TRACE(100, ("__kmpc_end_sections: T#%d called\n", gtid)); + + if (!active) { + // In active case call finalization is done in __kmpc_next_section +#if OMPT_SUPPORT && OMPT_OPTIONAL + if (ompt_enabled.ompt_callback_work) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + ompt_callbacks.ompt_callback(ompt_callback_work)( + ompt_work_sections, ompt_scope_end, &(team_info->parallel_data), + &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); + } +#endif + KMP_POP_PARTITIONED_TIMER(); + } + + KD_TRACE(100, ("__kmpc_end_sections: T#%d returned\n", gtid)); +} + template static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, kmp_int32 *plastiter, T *plower, T *pupper,