Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -3202,6 +3202,43 @@ #endif // KMP_USE_DYNAMIC_LOCK } // __kmp_end_critical_section_reduce_block +#if OMP_40_ENABLED +static __forceinline int +__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, + int *task_state) { + kmp_team_t *team; + + // Check if we are inside the teams construct? + if (th->th.th_teams_microtask) { + *team_p = team = th->th.th_team; + if (team->t.t_level == th->th.th_teams_level) { + // This is reduction at teams construct. + KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 + // Let's swap teams temporarily for the reduction. + th->th.th_info.ds.ds_tid = team->t.t_master_tid; + th->th.th_team = team->t.t_parent; + th->th.th_team_nproc = th->th.th_team->t.t_nproc; + th->th.th_task_team = th->th.th_team->t.t_task_team[0]; + *task_state = th->th.th_task_state; + th->th.th_task_state = 0; + + return 1; + } + } + return 0; +} + +static __forceinline void +__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { + // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. + th->th.th_info.ds.ds_tid = 0; + th->th.th_team = team; + th->th.th_team_nproc = team->t.t_nproc; + th->th.th_task_team = team->t.t_task_team[task_state]; + th->th.th_task_state = task_state; +} +#endif + /* 2.a.i. Reduce Block without a terminating barrier */ /*! @ingroup SYNCHRONIZATION @@ -3228,8 +3265,8 @@ int retval = 0; PACKED_REDUCTION_METHOD_T packed_reduction_method; #if OMP_40_ENABLED - kmp_team_t *team; kmp_info_t *th; + kmp_team_t *team; int teams_swapped = 0, task_state; #endif KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); @@ -3254,22 +3291,7 @@ #if OMP_40_ENABLED th = __kmp_thread_from_gtid(global_tid); - if (th->th.th_teams_microtask) { // AC: check if we are inside the teams - // construct? - team = th->th.th_team; - if (team->t.t_level == th->th.th_teams_level) { - // this is reduction at teams construct - KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 - // Let's swap teams temporarily for the reduction barrier - teams_swapped = 1; - th->th.th_info.ds.ds_tid = team->t.t_master_tid; - th->th.th_team = team->t.t_parent; - th->th.th_team_nproc = th->th.th_team->t.t_nproc; - th->th.th_task_team = th->th.th_team->t.t_task_team[0]; - task_state = th->th.th_task_state; - th->th.th_task_state = 0; - } - } + teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); #endif // OMP_40_ENABLED // packed_reduction_method value will be reused by __kmp_end_reduce* function, @@ -3373,12 +3395,7 @@ } #if OMP_40_ENABLED if (teams_swapped) { - // Restore thread structure - th->th.th_info.ds.ds_tid = 0; - th->th.th_team = team; - th->th.th_team_nproc = team->t.t_nproc; - th->th.th_task_team = team->t.t_task_team[task_state]; - th->th.th_task_state = task_state; + __kmp_restore_swapped_teams(th, team, task_state); } #endif KA_TRACE( @@ -3466,6 +3483,11 @@ KMP_COUNT_BLOCK(REDUCE_wait); int retval = 0; PACKED_REDUCTION_METHOD_T packed_reduction_method; +#if OMP_40_ENABLED + kmp_info_t *th; + kmp_team_t *team; + int teams_swapped = 0, task_state; +#endif KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); @@ -3487,6 +3509,11 @@ __kmp_push_sync(global_tid, ct_reduce, loc, NULL); #endif +#if OMP_40_ENABLED + th = __kmp_thread_from_gtid(global_tid); + teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); +#endif // OMP_40_ENABLED + packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); @@ -3548,6 +3575,11 @@ // should never reach this block KMP_ASSERT(0); // "unexpected method" } +#if OMP_40_ENABLED + if (teams_swapped) { + __kmp_restore_swapped_teams(th, team, task_state); + } +#endif KA_TRACE(10, ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", @@ -3570,9 +3602,19 @@ kmp_critical_name *lck) { PACKED_REDUCTION_METHOD_T packed_reduction_method; +#if OMP_40_ENABLED + kmp_info_t *th; + kmp_team_t *team; + int teams_swapped = 0, task_state; +#endif KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); +#if OMP_40_ENABLED + th = __kmp_thread_from_gtid(global_tid); + teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); +#endif // OMP_40_ENABLED + packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); // this barrier should be visible to a customer and to the threading profile @@ -3660,6 +3702,11 @@ // should never reach this block KMP_ASSERT(0); // "unexpected method" } +#if OMP_40_ENABLED + if (teams_swapped) { + __kmp_restore_swapped_teams(th, team, task_state); + } +#endif if (__kmp_env_consistency_check) __kmp_pop_sync(global_tid, ct_reduce, loc); Index: runtime/test/misc_bugs/teams-reduction.c =================================================================== --- /dev/null +++ runtime/test/misc_bugs/teams-reduction.c @@ -0,0 +1,65 @@ +// RUN: %libomp-compile-and-run +// +// The test checks the teams construct with reduction executed on the host. +// + +#include +#include + +#include + +#ifndef N_TEAMS +#define N_TEAMS 4 +#endif +#ifndef N_THR +#define N_THR 3 +#endif + +static int err = 0; + +// Internal library stuff to emulate compiler's code generation: +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int32_t reserved_1; + int32_t flags; + int32_t reserved_2; + int32_t reserved_3; + char const *psource; +} ident_t; + +static ident_t dummy_loc = {0, 2, 0, 0, ";dummyFile;dummyFunc;0;0;;"}; + +typedef int32_t kmp_critical_name[8]; +kmp_critical_name crit; + +int32_t __kmpc_global_thread_num(ident_t *); +void __kmpc_push_num_teams(ident_t *, int32_t global_tid, int32_t num_teams, + int32_t num_threads); +void __kmpc_fork_teams(ident_t *, int32_t argc, void *microtask, ...); +int32_t __kmpc_reduce(ident_t *, int32_t global_tid, int32_t num_vars, + size_t reduce_size, void *reduce_data, void *reduce_func, + kmp_critical_name *lck); +void __kmpc_end_reduce(ident_t *, int32_t global_tid, kmp_critical_name *lck); + +#ifdef __cplusplus +} +#endif + +// Outlined entry point: +void outlined(int32_t *gtid, int32_t *tid) { + int32_t ret = __kmpc_reduce(&dummy_loc, *gtid, 0, 0, NULL, NULL, &crit); + __kmpc_end_reduce(&dummy_loc, *gtid, &crit); +} + +int main() { + int32_t th = __kmpc_global_thread_num(NULL); // registers initial thread + __kmpc_push_num_teams(&dummy_loc, th, N_TEAMS, N_THR); + __kmpc_fork_teams(&dummy_loc, 0, &outlined); + + // Test did not hang -> passed! + printf("passed\n"); + return err; +}