diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -360,6 +360,7 @@ __kmpc_team_static_init_4u 256 __kmpc_team_static_init_8 257 __kmpc_team_static_init_8u 258 + __kmpc_push_num_teams_51 284 %endif %ifndef stub diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -455,6 +455,7 @@ AffUseGlobCpuidL "%1$s: Affinity capable, using global cpuid leaf %2$d info" AffNotCapableUseLocCpuidL "%1$s: Affinity not capable, using local cpuid leaf %2$d info" AffNotUsingHwloc "%1$s: Affinity not capable, using hwloc." +FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)." # -------------------------------------------------------------------------------------------------- -*- HINTS -*- @@ -512,7 +513,7 @@ "a file for another architecture. " "Check whether \"%1$s\" is a file for %2$s architecture." SystemLimitOnThreads "System-related limit on the number of threads." - +SetNewBound "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause." # -------------------------------------------------------------------------------------------------- diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3363,6 +3363,8 @@ kmp_proc_bind_t proc_bind); extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, int num_threads); +extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb, + int num_teams_ub, int num_threads); extern void __kmp_yield(); @@ -3921,6 +3923,11 @@ KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads); +/* Function for OpenMP 5.1 num_teams clause */ +KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_teams_lb, + kmp_int32 num_teams_ub, + kmp_int32 num_threads); KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); struct kmp_dim { // loop bounds info casted to kmp_int64 diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -351,6 +351,33 @@ __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); } +/*! +@ingroup PARALLEL +@param loc source location information +@param global_tid global thread number +@param num_teams_lo lower bound on number of teams requested for the teams +construct +@param num_teams_up upper bound on number of teams requested for the teams +construct +@param num_threads number of threads per team requested for the teams construct + +Set the number of teams to be used by the teams construct. The number of initial +teams cretaed will be greater than or equal to the lower bound and less than or +equal to the upper bound. +This call is only required if the teams construct has a `num_teams` clause +or a `thread_limit` clause (or both). +*/ +void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, + kmp_int32 num_threads) { + KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" + " num_teams_ub=%d num_threads=%d\n", + global_tid, num_teams_lb, num_teams_ub, num_threads)); + __kmp_assert_valid_gtid(global_tid); + __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, + num_threads); +} + /*! @ingroup PARALLEL @param loc source location information diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -7425,39 +7425,15 @@ thr->th.th_set_nproc = num_threads; } -/* this sets the requested number of teams for the teams region and/or - the number of threads for the next parallel region encountered */ -void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, - int num_threads) { - kmp_info_t *thr = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(num_teams >= 0); - KMP_DEBUG_ASSERT(num_threads >= 0); - - if (num_teams == 0) { - if (__kmp_nteams > 0) { - num_teams = __kmp_nteams; - } else { - num_teams = 1; // default number of teams is 1. - } - } - if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? - if (!__kmp_reserve_warn) { - __kmp_reserve_warn = 1; - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); - } - num_teams = __kmp_teams_max_nth; - } - // Set number of teams (number of threads in the outer "parallel" of the - // teams) - thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; - +static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams, + int num_threads) { + KMP_DEBUG_ASSERT(thr); // Remember the number of threads for inner parallel regions if (!TCR_4(__kmp_init_middle)) __kmp_middle_initialize(); // get internal globals calculated KMP_DEBUG_ASSERT(__kmp_avail_proc); KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); + if (num_threads == 0) { if (__kmp_teams_thread_limit > 0) { num_threads = __kmp_teams_thread_limit; @@ -7476,6 +7452,9 @@ if (num_teams * num_threads > __kmp_teams_max_nth) { num_threads = __kmp_teams_max_nth / num_teams; } + if (num_threads == 0) { + num_threads = 1; + } } else { // This thread will be the master of the league masters // Store new thread limit; old limit is saved in th_cg_roots list @@ -7486,11 +7465,16 @@ } if (num_teams * num_threads > __kmp_teams_max_nth) { int new_threads = __kmp_teams_max_nth / num_teams; - if (!__kmp_reserve_warn) { // user asked for too many threads - __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT - __kmp_msg(kmp_ms_warning, - KMP_MSG(CantFormThrTeam, num_threads, new_threads), - KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + if (new_threads == 0) { + new_threads = 1; + } + if (new_threads != num_threads) { + if (!__kmp_reserve_warn) { // user asked for too many threads + __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, num_threads, new_threads), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } } num_threads = new_threads; } @@ -7498,6 +7482,94 @@ thr->th.th_teams_size.nth = num_threads; } +/* this sets the requested number of teams for the teams region and/or + the number of threads for the next parallel region encountered */ +void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, + int num_threads) { + kmp_info_t *thr = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(num_teams >= 0); + KMP_DEBUG_ASSERT(num_threads >= 0); + + if (num_teams == 0) { + if (__kmp_nteams > 0) { + num_teams = __kmp_nteams; + } else { + num_teams = 1; // default number of teams is 1. + } + } + if (num_teams > __kmp_teams_max_nth) { // if too many teams requested? + if (!__kmp_reserve_warn) { + __kmp_reserve_warn = 1; + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + num_teams = __kmp_teams_max_nth; + } + // Set number of teams (number of threads in the outer "parallel" of the + // teams) + thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; + + __kmp_push_thread_limit(thr, num_teams, num_threads); +} + +/* This sets the requested number of teams for the teams region and/or + the number of threads for the next parallel region encountered */ +void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb, + int num_teams_ub, int num_threads) { + kmp_info_t *thr = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0); + KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb); + KMP_DEBUG_ASSERT(num_threads >= 0); + + if (num_teams_lb > num_teams_ub) { + __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub), + KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null); + } + + int num_teams = 1; // defalt number of teams is 1. + + if (num_teams_lb == 0 && num_teams_ub > 0) + num_teams_lb = num_teams_ub; + + if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause + num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams; + if (num_teams > __kmp_teams_max_nth) { + if (!__kmp_reserve_warn) { + __kmp_reserve_warn = 1; + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + num_teams = __kmp_teams_max_nth; + } + } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams + num_teams = num_teams_ub; + } else { // num_teams_lb <= num_teams <= num_teams_ub + if (num_threads == 0) { + if (num_teams_ub > __kmp_teams_max_nth) { + num_teams = num_teams_lb; + } else { + num_teams = num_teams_ub; + } + } else { + num_teams = (num_threads > __kmp_teams_max_nth) + ? num_teams + : __kmp_teams_max_nth / num_threads; + if (num_teams < num_teams_lb) { + num_teams = num_teams_lb; + } else if (num_teams > num_teams_ub) { + num_teams = num_teams_ub; + } + } + } + // Set number of teams (number of threads in the outer "parallel" of the + // teams) + thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; + + __kmp_push_thread_limit(thr, num_teams, num_threads); +} + // Set the proc_bind var to use in the following parallel region. void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { kmp_info_t *thr = __kmp_threads[gtid]; diff --git a/openmp/runtime/test/teams/kmp_num_teams.c b/openmp/runtime/test/teams/kmp_num_teams.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/teams/kmp_num_teams.c @@ -0,0 +1,93 @@ +// RUN: %libomp-compile-and-run +// UNSUPPORTED: gcc + +#include +#include +#include + +#define NT 8 + +#ifdef __cplusplus +extern "C" { +#endif +typedef int kmp_int32; +typedef struct ident { + kmp_int32 reserved_1; + kmp_int32 flags; + kmp_int32 reserved_2; + kmp_int32 reserved_3; + char const *psource; +} ident_t; +extern int __kmpc_global_thread_num(ident_t *); +extern void __kmpc_push_num_teams_51(ident_t *, kmp_int32, kmp_int32, kmp_int32, + kmp_int32); +#ifdef __cplusplus +} +#endif + +void check_num_teams(int num_teams_lb, int num_teams_ub, int thread_limit) { + int nteams, nthreads; + int a = 0; + + int gtid = __kmpc_global_thread_num(NULL); + __kmpc_push_num_teams_51(NULL, gtid, num_teams_lb, num_teams_ub, + thread_limit); + +#pragma omp target teams + { + int priv_nteams; + int team_num = omp_get_team_num(); + if (team_num == 0) + nteams = omp_get_num_teams(); + priv_nteams = omp_get_num_teams(); +#pragma omp parallel + { + int priv_nthreads; + int thread_num = omp_get_thread_num(); + int teams_ub, teams_lb, thr_limit; + if (team_num == 0 && thread_num == 0) + nthreads = omp_get_num_threads(); + priv_nthreads = omp_get_num_threads(); + + teams_ub = (num_teams_ub ? num_teams_ub : priv_nteams); + teams_lb = (num_teams_lb ? num_teams_lb : teams_ub); + thr_limit = (thread_limit ? thread_limit : priv_nthreads); + + if (priv_nteams < teams_lb || priv_nteams > teams_ub) { + fprintf(stderr, "error: invalid number of teams=%d\n", priv_nteams); + exit(1); + } + if (priv_nthreads > thr_limit) { + fprintf(stderr, "error: invalid number of threads=%d\n", priv_nthreads); + exit(1); + } +#pragma omp atomic + a++; + } + } + if (a != nteams * nthreads) { + fprintf(stderr, "error: a (%d) != nteams * nthreads (%d)\n", a, + nteams * nthreads); + exit(1); + } else { + printf("#teams %d, #threads %d: Hello!\n", nteams, nthreads); + } +} + +int main(int argc, char *argv[]) { + omp_set_num_threads(NT); + + check_num_teams(1, 8, 2); + check_num_teams(2, 2, 2); + check_num_teams(2, 2, 0); + check_num_teams(8, 16, 2); + check_num_teams(9, 16, 0); + check_num_teams(9, 16, 2); + check_num_teams(2, 3, 0); + check_num_teams(0, 0, 2); + check_num_teams(0, 4, 0); + check_num_teams(0, 2, 2); + + printf("Test Passed\n"); + return 0; +}